diff options
Diffstat (limited to 'drivers/net/ethernet')
303 files changed, 16383 insertions, 4343 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 4b85f2b74872..1cdff1dca790 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -82,6 +82,7 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" +source "drivers/net/ethernet/microsoft/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -98,7 +99,8 @@ config JME config KORINA tristate "Korina (IDT RC32434) Ethernet support" - depends on MIKROTIK_RB532 + depends on MIKROTIK_RB532 || COMPILE_TEST + select MII help If you have a Mikrotik RouterBoard 500 or IDT RC32434 based system say Y. Otherwise say N. diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 9394493e8187..cb3f9084a21b 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/ obj-$(CONFIG_NET_VENDOR_IBM) += ibm/ obj-$(CONFIG_NET_VENDOR_INTEL) += intel/ obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/ +obj-$(CONFIG_NET_VENDOR_MICROSOFT) += microsoft/ obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/ obj-$(CONFIG_JME) += jme.o obj-$(CONFIG_KORINA) += korina.o diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 9c5891bbfe61..d77fafbc1530 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1449,10 +1449,10 @@ static int greth_of_probe(struct platform_device *ofdev) break; } if (i == 6) { - const u8 *addr; + u8 addr[ETH_ALEN]; - addr = of_get_mac_address(ofdev->dev.of_node); - if (!IS_ERR(addr)) { + err = of_get_mac_address(ofdev->dev.of_node, addr); + if (!err) { for (i = 0; i < 6; i++) macaddr[i] = (unsigned int) addr[i]; } else { diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 5ed80d9a6b9f..f99ae317c188 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -790,7 +790,6 @@ static int emac_probe(struct platform_device *pdev) struct emac_board_info *db; struct net_device *ndev; int ret = 0; - const char *mac_addr; ndev = alloc_etherdev(sizeof(struct emac_board_info)); if (!ndev) { @@ -853,12 +852,9 @@ static int emac_probe(struct platform_device *pdev) } /* Read MAC-address from DT */ - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - - /* Check if the MAC address is valid, if not get a random one */ - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(np, ndev->dev_addr); + if (ret) { + /* if the MAC address is invalid get a random one */ eth_hw_addr_random(ndev); dev_warn(&pdev->dev, "using random MAC address %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 907125abef2c..1c00d719e5d7 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1351,7 +1351,6 @@ static int altera_tse_probe(struct platform_device *pdev) struct resource *control_port; struct resource *dma_res; struct altera_tse_private *priv; - const unsigned char *macaddr; void __iomem *descmap; const struct of_device_id *of_id = NULL; @@ -1525,10 +1524,8 @@ static int altera_tse_probe(struct platform_device *pdev) priv->rx_dma_buf_sz = ALTERA_RXDMABUFFER_SIZE; /* get default MAC address from device tree */ - macaddr = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(macaddr)) - ether_addr_copy(ndev->dev_addr, macaddr); - else + ret = of_get_mac_address(pdev->dev.of_node, ndev->dev_addr); + if (ret) eth_hw_addr_random(ndev); /* get phy addr and create mdio */ diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 187b0b9a6e1d..f78daba60b35 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1534,8 +1534,7 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) } pci_set_master(pdev); - ioaddr = pci_resource_start(pdev, 0); - if (!ioaddr) { + if (!pci_resource_len(pdev, 0)) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("card has no PCI IO resources, aborting\n"); err = -ENODEV; @@ -1548,6 +1547,8 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) pr_err("architecture does not support 32bit PCI busmaster DMA\n"); goto err_disable_dev; } + + ioaddr = pci_resource_start(pdev, 0); if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("io address range already allocated\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ba8321ec1ee7..3305979a9f7c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -180,9 +180,9 @@ #define XGBE_DMA_SYS_AWCR 0x30303030 /* DMA cache settings - PCI device */ -#define XGBE_DMA_PCI_ARCR 0x00000003 -#define XGBE_DMA_PCI_AWCR 0x13131313 -#define XGBE_DMA_PCI_AWARCR 0x00000313 +#define XGBE_DMA_PCI_ARCR 0x000f0f0f +#define XGBE_DMA_PCI_AWCR 0x0f0f0f0f +#define XGBE_DMA_PCI_AWARCR 0x00000f0f /* DMA channel interrupt modes */ #define XGBE_IRQ_MODE_EDGE 0 diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index b56a9e2aecd9..67b8113a2b53 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -857,7 +857,6 @@ int arc_emac_probe(struct net_device *ndev, int interface) struct device_node *phy_node; struct phy_device *phydev = NULL; struct arc_emac_priv *priv; - const char *mac_addr; unsigned int id, clock_frequency, irq; int err; @@ -942,11 +941,8 @@ int arc_emac_probe(struct net_device *ndev, int interface) } /* Get MAC address from device tree */ - mac_addr = of_get_mac_address(dev->of_node); - - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - else + err = of_get_mac_address(dev->of_node, ndev->dev_addr); + if (err) eth_hw_addr_random(ndev); arc_emac_set_address_internal(ndev); diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig index fb803bf92ded..6842b74b0696 100644 --- a/drivers/net/ethernet/atheros/Kconfig +++ b/drivers/net/ethernet/atheros/Kconfig @@ -20,6 +20,7 @@ if NET_VENDOR_ATHEROS config AG71XX tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support" depends on ATH79 + select NET_SELFTESTS select PHYLINK help If you wish to compile a kernel for AR7XXX/91XXX and enable diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 7352f98123c7..1ba81b1eb6fd 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -37,6 +37,7 @@ #include <linux/reset.h> #include <linux/clk.h> #include <linux/io.h> +#include <net/selftests.h> /* For our NAPI weight bigger does *NOT* mean better - it means more * D-cache misses and lots more wasted cycles than we'll ever @@ -497,12 +498,17 @@ static int ag71xx_ethtool_set_pauseparam(struct net_device *ndev, static void ag71xx_ethtool_get_strings(struct net_device *netdev, u32 sset, u8 *data) { - if (sset == ETH_SS_STATS) { - int i; + int i; + switch (sset) { + case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++) memcpy(data + i * ETH_GSTRING_LEN, ag71xx_statistics[i].name, ETH_GSTRING_LEN); + break; + case ETH_SS_TEST: + net_selftest_get_strings(data); + break; } } @@ -519,9 +525,14 @@ static void ag71xx_ethtool_get_stats(struct net_device *ndev, static int ag71xx_ethtool_get_sset_count(struct net_device *ndev, int sset) { - if (sset == ETH_SS_STATS) + switch (sset) { + case ETH_SS_STATS: return ARRAY_SIZE(ag71xx_statistics); - return -EOPNOTSUPP; + case ETH_SS_TEST: + return net_selftest_get_count(); + default: + return -EOPNOTSUPP; + } } static const struct ethtool_ops ag71xx_ethtool_ops = { @@ -536,6 +547,7 @@ static const struct ethtool_ops ag71xx_ethtool_ops = { .get_strings = ag71xx_ethtool_get_strings, .get_ethtool_stats = ag71xx_ethtool_get_stats, .get_sset_count = ag71xx_ethtool_get_sset_count, + .self_test = net_selftest, }; static int ag71xx_mdio_wait_busy(struct ag71xx *ag) @@ -1856,7 +1868,6 @@ static int ag71xx_probe(struct platform_device *pdev) const struct ag71xx_dcfg *dcfg; struct net_device *ndev; struct resource *res; - const void *mac_addr; int tx_size, err, i; struct ag71xx *ag; @@ -1957,10 +1968,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc->ctrl = 0; ag->stop_desc->next = (u32)ag->stop_desc_dma; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); - if (IS_ERR(mac_addr) || !is_valid_ether_addr(ndev->dev_addr)) { + err = of_get_mac_address(np, ndev->dev_addr); + if (err) { netif_err(ag, probe, ndev, "invalid MAC address, using random address\n"); eth_random_addr(ndev->dev_addr); } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index a0562a90fb6d..28ae5c16831e 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -367,6 +367,7 @@ struct atl1c_hw { u16 phy_id1; u16 phy_id2; + spinlock_t intr_mask_lock; /* protect the intr_mask */ u32 intr_mask; u8 preamble_len; @@ -506,6 +507,7 @@ struct atl1c_adapter { struct net_device *netdev; struct pci_dev *pdev; struct napi_struct napi; + struct napi_struct tx_napi; struct page *rx_page; unsigned int rx_page_offset; unsigned int rx_frag_size; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index d54375b255dc..1d17c24e6d75 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -813,6 +813,7 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter) atl1c_set_rxbufsize(adapter, adapter->netdev); atomic_set(&adapter->irq_sem, 1); spin_lock_init(&adapter->mdio_lock); + spin_lock_init(&adapter->hw.intr_mask_lock); set_bit(__AT_DOWN, &adapter->flags); return 0; @@ -1530,20 +1531,19 @@ static inline void atl1c_clear_phy_int(struct atl1c_adapter *adapter) spin_unlock(&adapter->mdio_lock); } -static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter, - enum atl1c_trans_queue type) +static int atl1c_clean_tx(struct napi_struct *napi, int budget) { - struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type]; + struct atl1c_adapter *adapter = + container_of(napi, struct atl1c_adapter, tx_napi); + struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[atl1c_trans_normal]; struct atl1c_buffer *buffer_info; struct pci_dev *pdev = adapter->pdev; u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean); u16 hw_next_to_clean; - u16 reg; unsigned int total_bytes = 0, total_packets = 0; + unsigned long flags; - reg = type == atl1c_trans_high ? REG_TPD_PRI1_CIDX : REG_TPD_PRI0_CIDX; - - AT_READ_REGW(&adapter->hw, reg, &hw_next_to_clean); + AT_READ_REGW(&adapter->hw, REG_TPD_PRI0_CIDX, &hw_next_to_clean); while (next_to_clean != hw_next_to_clean) { buffer_info = &tpd_ring->buffer_info[next_to_clean]; @@ -1564,7 +1564,15 @@ static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter, netif_wake_queue(adapter->netdev); } - return true; + if (total_packets < budget) { + napi_complete_done(napi, total_packets); + spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags); + adapter->hw.intr_mask |= ISR_TX_PKT; + AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask); + spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags); + return total_packets; + } + return budget; } /** @@ -1599,13 +1607,22 @@ static irqreturn_t atl1c_intr(int irq, void *data) AT_WRITE_REG(hw, REG_ISR, status | ISR_DIS_INT); if (status & ISR_RX_PKT) { if (likely(napi_schedule_prep(&adapter->napi))) { + spin_lock(&hw->intr_mask_lock); hw->intr_mask &= ~ISR_RX_PKT; AT_WRITE_REG(hw, REG_IMR, hw->intr_mask); + spin_unlock(&hw->intr_mask_lock); __napi_schedule(&adapter->napi); } } - if (status & ISR_TX_PKT) - atl1c_clean_tx_irq(adapter, atl1c_trans_normal); + if (status & ISR_TX_PKT) { + if (napi_schedule_prep(&adapter->tx_napi)) { + spin_lock(&hw->intr_mask_lock); + hw->intr_mask &= ~ISR_TX_PKT; + AT_WRITE_REG(hw, REG_IMR, hw->intr_mask); + spin_unlock(&hw->intr_mask_lock); + __napi_schedule(&adapter->tx_napi); + } + } handled = IRQ_HANDLED; /* check if PCIE PHY Link down */ @@ -1876,6 +1893,7 @@ static int atl1c_clean(struct napi_struct *napi, int budget) struct atl1c_adapter *adapter = container_of(napi, struct atl1c_adapter, napi); int work_done = 0; + unsigned long flags; /* Keep link state information with original netdev */ if (!netif_carrier_ok(adapter->netdev)) @@ -1886,8 +1904,10 @@ static int atl1c_clean(struct napi_struct *napi, int budget) if (work_done < budget) { quit_polling: napi_complete_done(napi, work_done); + spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags); adapter->hw.intr_mask |= ISR_RX_PKT; AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask); + spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags); } return work_done; } @@ -2325,6 +2345,7 @@ static int atl1c_up(struct atl1c_adapter *adapter) atl1c_check_link_status(adapter); clear_bit(__AT_DOWN, &adapter->flags); napi_enable(&adapter->napi); + napi_enable(&adapter->tx_napi); atl1c_irq_enable(adapter); netif_start_queue(netdev); return err; @@ -2345,6 +2366,7 @@ static void atl1c_down(struct atl1c_adapter *adapter) set_bit(__AT_DOWN, &adapter->flags); netif_carrier_off(netdev); napi_disable(&adapter->napi); + napi_disable(&adapter->tx_napi); atl1c_irq_disable(adapter); atl1c_free_irq(adapter); /* disable ASPM if device inactive */ @@ -2593,7 +2615,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->mii.mdio_write = atl1c_mdio_write; adapter->mii.phy_id_mask = 0x1f; adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK; + dev_set_threaded(netdev, true); netif_napi_add(netdev, &adapter->napi, atl1c_clean, 64); + netif_napi_add(netdev, &adapter->tx_napi, atl1c_clean_tx, 64); timer_setup(&adapter->phy_config_timer, atl1c_phy_config, 0); /* setup the private structure */ err = atl1c_sw_init(adapter); diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index cbfed1d1477b..60d908507f51 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -181,6 +181,7 @@ static int bcm4908_dma_alloc_buf_descs(struct bcm4908_enet *enet, err_free_buf_descs: dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr); + ring->cpu_addr = NULL; return -ENOMEM; } @@ -685,7 +686,6 @@ static int bcm4908_enet_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct net_device *netdev; struct bcm4908_enet *enet; - const u8 *mac; int err; netdev = devm_alloc_etherdev(dev, sizeof(*enet)); @@ -715,10 +715,8 @@ static int bcm4908_enet_probe(struct platform_device *pdev) return err; SET_NETDEV_DEV(netdev, &pdev->dev); - mac = of_get_mac_address(dev->of_node); - if (!IS_ERR(mac)) - ether_addr_copy(netdev->dev_addr, mac); - else + err = of_get_mac_address(dev->of_node, netdev->dev_addr); + if (err) eth_hw_addr_random(netdev); netdev->netdev_ops = &bcm4908_enet_netdev_ops; netdev->min_mtu = ETH_ZLEN; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 777bbf6d2586..d9f0f0df8f7b 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2457,7 +2457,6 @@ static int bcm_sysport_probe(struct platform_device *pdev) struct bcm_sysport_priv *priv; struct device_node *dn; struct net_device *dev; - const void *macaddr; u32 txq, rxq; int ret; @@ -2552,12 +2551,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) } /* Initialize netdevice members */ - macaddr = of_get_mac_address(dn); - if (IS_ERR(macaddr)) { + ret = of_get_mac_address(dn, dev->dev_addr); + if (ret) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); eth_hw_addr_random(dev); - } else { - ether_addr_copy(dev->dev_addr, macaddr); } SET_NETDEV_DEV(dev, &pdev->dev); diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index a5fd161ab5ee..85fa0ab7201c 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -115,7 +115,7 @@ static int bgmac_probe(struct bcma_device *core) struct ssb_sprom *sprom = &core->bus->sprom; struct mii_bus *mii_bus; struct bgmac *bgmac; - const u8 *mac = NULL; + const u8 *mac; int err; bgmac = bgmac_alloc(&core->dev); @@ -128,11 +128,10 @@ static int bgmac_probe(struct bcma_device *core) bcma_set_drvdata(core, bgmac); - if (bgmac->dev->of_node) - mac = of_get_mac_address(bgmac->dev->of_node); + err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr); /* If no MAC address assigned via device tree, check SPROM */ - if (IS_ERR_OR_NULL(mac)) { + if (err) { switch (core->core_unit) { case 0: mac = sprom->et0mac; @@ -149,10 +148,9 @@ static int bgmac_probe(struct bcma_device *core) err = -ENOTSUPP; goto err; } + ether_addr_copy(bgmac->net_dev->dev_addr, mac); } - ether_addr_copy(bgmac->net_dev->dev_addr, mac); - /* On BCM4706 we need common core to access PHY */ if (core->id.id == BCMA_CORE_4706_MAC_GBIT && !core->bus->drv_gmac_cmn.core) { diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index f37f1c58f368..9834b77cf4b6 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -173,7 +173,7 @@ static int bgmac_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct bgmac *bgmac; struct resource *regs; - const u8 *mac_addr; + int ret; bgmac = bgmac_alloc(&pdev->dev); if (!bgmac) @@ -192,11 +192,10 @@ static int bgmac_probe(struct platform_device *pdev) bgmac->dev = &pdev->dev; bgmac->dma_dev = &pdev->dev; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(bgmac->net_dev->dev_addr, mac_addr); - else - dev_warn(&pdev->dev, "MAC address not present in device tree\n"); + ret = of_get_mac_address(np, bgmac->net_dev->dev_addr); + if (ret) + dev_warn(&pdev->dev, + "MAC address not present in device tree\n"); bgmac->irq = platform_get_irq(pdev, 0); if (bgmac->irq < 0) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6f13642121c4..e15d454e33f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9532,8 +9532,8 @@ static int bnxt_try_recover_fw(struct bnxt *bp) do { sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); rc = __bnxt_hwrm_ver_get(bp, true); - if (!sts || (!BNXT_FW_IS_BOOTING(sts) && - !BNXT_FW_IS_RECOVERING(sts))) + if (!BNXT_FW_IS_BOOTING(sts) && + !BNXT_FW_IS_RECOVERING(sts)) break; retry++; } while (rc == -EBUSY && retry < BNXT_FW_RETRY); @@ -11081,6 +11081,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) pci_disable_device(bp->pdev); } __bnxt_close_nic(bp, true, false); + bnxt_vf_reps_free(bp); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); if (pci_is_enabled(bp->pdev)) @@ -11825,6 +11826,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); + bnxt_vf_reps_alloc(bp); + bnxt_vf_reps_open(bp); bnxt_dl_health_recovery_done(bp); bnxt_dl_health_status_update(bp, true); rtnl_unlock(); @@ -12972,6 +12975,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc); } + bnxt_inv_fw_health_reg(bp); bnxt_dl_register(bp); rc = register_netdev(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2f8b193a772d..3b66e300c962 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1930,6 +1930,20 @@ static int bnxt_get_fecparam(struct net_device *dev, return 0; } +static void bnxt_get_fec_stats(struct net_device *dev, + struct ethtool_fec_stats *fec_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) + return; + + rx = bp->rx_port_stats_ext.sw_stats; + fec_stats->corrected_bits.total = + *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_corrected_bits)); +} + static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info, u32 fec) { @@ -3976,6 +3990,133 @@ ethtool_init_exit: mutex_unlock(&bp->hwrm_cmd_lock); } +static void bnxt_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) + return; + + rx = bp->rx_port_stats_ext.sw_stats; + phy_stats->SymbolErrorDuringCarrier = + *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_pcs_symbol_err)); +} + +static void bnxt_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx, *tx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS)) + return; + + rx = bp->port_stats.sw_stats; + tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8; + + mac_stats->FramesReceivedOK = + BNXT_GET_RX_PORT_STATS64(rx, rx_good_frames); + mac_stats->FramesTransmittedOK = + BNXT_GET_TX_PORT_STATS64(tx, tx_good_frames); + mac_stats->FrameCheckSequenceErrors = + BNXT_GET_RX_PORT_STATS64(rx, rx_fcs_err_frames); + mac_stats->AlignmentErrors = + BNXT_GET_RX_PORT_STATS64(rx, rx_align_err_frames); + mac_stats->OutOfRangeLengthField = + BNXT_GET_RX_PORT_STATS64(rx, rx_oor_len_frames); +} + +static void bnxt_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS)) + return; + + rx = bp->port_stats.sw_stats; + ctrl_stats->MACControlFramesReceived = + BNXT_GET_RX_PORT_STATS64(rx, rx_ctrl_frames); +} + +static const struct ethtool_rmon_hist_range bnxt_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 9216 }, + { 9217, 16383 }, + {} +}; + +static void bnxt_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx, *tx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS)) + return; + + rx = bp->port_stats.sw_stats; + tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8; + + rmon_stats->jabbers = + BNXT_GET_RX_PORT_STATS64(rx, rx_jbr_frames); + rmon_stats->oversize_pkts = + BNXT_GET_RX_PORT_STATS64(rx, rx_ovrsz_frames); + rmon_stats->undersize_pkts = + BNXT_GET_RX_PORT_STATS64(rx, rx_undrsz_frames); + + rmon_stats->hist[0] = BNXT_GET_RX_PORT_STATS64(rx, rx_64b_frames); + rmon_stats->hist[1] = BNXT_GET_RX_PORT_STATS64(rx, rx_65b_127b_frames); + rmon_stats->hist[2] = BNXT_GET_RX_PORT_STATS64(rx, rx_128b_255b_frames); + rmon_stats->hist[3] = BNXT_GET_RX_PORT_STATS64(rx, rx_256b_511b_frames); + rmon_stats->hist[4] = + BNXT_GET_RX_PORT_STATS64(rx, rx_512b_1023b_frames); + rmon_stats->hist[5] = + BNXT_GET_RX_PORT_STATS64(rx, rx_1024b_1518b_frames); + rmon_stats->hist[6] = + BNXT_GET_RX_PORT_STATS64(rx, rx_1519b_2047b_frames); + rmon_stats->hist[7] = + BNXT_GET_RX_PORT_STATS64(rx, rx_2048b_4095b_frames); + rmon_stats->hist[8] = + BNXT_GET_RX_PORT_STATS64(rx, rx_4096b_9216b_frames); + rmon_stats->hist[9] = + BNXT_GET_RX_PORT_STATS64(rx, rx_9217b_16383b_frames); + + rmon_stats->hist_tx[0] = + BNXT_GET_TX_PORT_STATS64(tx, tx_64b_frames); + rmon_stats->hist_tx[1] = + BNXT_GET_TX_PORT_STATS64(tx, tx_65b_127b_frames); + rmon_stats->hist_tx[2] = + BNXT_GET_TX_PORT_STATS64(tx, tx_128b_255b_frames); + rmon_stats->hist_tx[3] = + BNXT_GET_TX_PORT_STATS64(tx, tx_256b_511b_frames); + rmon_stats->hist_tx[4] = + BNXT_GET_TX_PORT_STATS64(tx, tx_512b_1023b_frames); + rmon_stats->hist_tx[5] = + BNXT_GET_TX_PORT_STATS64(tx, tx_1024b_1518b_frames); + rmon_stats->hist_tx[6] = + BNXT_GET_TX_PORT_STATS64(tx, tx_1519b_2047b_frames); + rmon_stats->hist_tx[7] = + BNXT_GET_TX_PORT_STATS64(tx, tx_2048b_4095b_frames); + rmon_stats->hist_tx[8] = + BNXT_GET_TX_PORT_STATS64(tx, tx_4096b_9216b_frames); + rmon_stats->hist_tx[9] = + BNXT_GET_TX_PORT_STATS64(tx, tx_9217b_16383b_frames); + + *ranges = bnxt_rmon_ranges; +} + void bnxt_ethtool_free(struct bnxt *bp) { kfree(bp->test_info); @@ -3991,6 +4132,7 @@ const struct ethtool_ops bnxt_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, + .get_fec_stats = bnxt_get_fec_stats, .get_fecparam = bnxt_get_fecparam, .set_fecparam = bnxt_set_fecparam, .get_pause_stats = bnxt_get_pause_stats, @@ -4034,4 +4176,8 @@ const struct ethtool_ops bnxt_ethtool_ops = { .set_dump = bnxt_set_dump, .get_dump_flag = bnxt_get_dump_flag, .get_dump_data = bnxt_get_dump_data, + .get_eth_phy_stats = bnxt_get_eth_phy_stats, + .get_eth_mac_stats = bnxt_get_eth_mac_stats, + .get_eth_ctrl_stats = bnxt_get_eth_ctrl_stats, + .get_rmon_stats = bnxt_get_rmon_stats, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 4b5c8fd76a51..dd66302343a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -284,8 +284,26 @@ void bnxt_vf_reps_open(struct bnxt *bp) if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) return; - for (i = 0; i < pci_num_vf(bp->pdev); i++) - bnxt_vf_rep_open(bp->vf_reps[i]->dev); + for (i = 0; i < pci_num_vf(bp->pdev); i++) { + /* Open the VF-Rep only if it is allocated in the FW */ + if (bp->vf_reps[i]->tx_cfa_action != CFA_HANDLE_INVALID) + bnxt_vf_rep_open(bp->vf_reps[i]->dev); + } +} + +static void __bnxt_free_one_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep) +{ + if (!vf_rep) + return; + + if (vf_rep->dst) { + dst_release((struct dst_entry *)vf_rep->dst); + vf_rep->dst = NULL; + } + if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) { + hwrm_cfa_vfr_free(bp, vf_rep->vf_idx); + vf_rep->tx_cfa_action = CFA_HANDLE_INVALID; + } } static void __bnxt_vf_reps_destroy(struct bnxt *bp) @@ -297,11 +315,7 @@ static void __bnxt_vf_reps_destroy(struct bnxt *bp) for (i = 0; i < num_vfs; i++) { vf_rep = bp->vf_reps[i]; if (vf_rep) { - dst_release((struct dst_entry *)vf_rep->dst); - - if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) - hwrm_cfa_vfr_free(bp, vf_rep->vf_idx); - + __bnxt_free_one_vf_rep(bp, vf_rep); if (vf_rep->dev) { /* if register_netdev failed, then netdev_ops * would have been set to NULL @@ -350,6 +364,80 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) __bnxt_vf_reps_destroy(bp); } +/* Free the VF-Reps in firmware, during firmware hot-reset processing. + * Note that the VF-Rep netdevs are still active (not unregistered) during + * this process. As the mode transition from SWITCHDEV to LEGACY happens + * under the rtnl_lock() this routine is safe under the rtnl_lock(). + */ +void bnxt_vf_reps_free(struct bnxt *bp) +{ + u16 num_vfs = pci_num_vf(bp->pdev); + int i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + for (i = 0; i < num_vfs; i++) + __bnxt_free_one_vf_rep(bp, bp->vf_reps[i]); +} + +static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, + u16 *cfa_code_map) +{ + /* get cfa handles from FW */ + if (hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, &vf_rep->tx_cfa_action, + &vf_rep->rx_cfa_code)) + return -ENOLINK; + + cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx; + vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); + if (!vf_rep->dst) + return -ENOMEM; + + /* only cfa_action is needed to mux a packet while TXing */ + vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action; + vf_rep->dst->u.port_info.lower_dev = bp->dev; + + return 0; +} + +/* Allocate the VF-Reps in firmware, during firmware hot-reset processing. + * Note that the VF-Rep netdevs are still active (not unregistered) during + * this process. As the mode transition from SWITCHDEV to LEGACY happens + * under the rtnl_lock() this routine is safe under the rtnl_lock(). + */ +int bnxt_vf_reps_alloc(struct bnxt *bp) +{ + u16 *cfa_code_map = bp->cfa_code_map, num_vfs = pci_num_vf(bp->pdev); + struct bnxt_vf_rep *vf_rep; + int rc, i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return 0; + + if (!cfa_code_map) + return -EINVAL; + + for (i = 0; i < MAX_CFA_CODE; i++) + cfa_code_map[i] = VF_IDX_INVALID; + + for (i = 0; i < num_vfs; i++) { + vf_rep = bp->vf_reps[i]; + vf_rep->vf_idx = i; + + rc = bnxt_alloc_vf_rep(bp, vf_rep, cfa_code_map); + if (rc) + goto err; + } + + return 0; + +err: + netdev_info(bp->dev, "%s error=%d\n", __func__, rc); + bnxt_vf_reps_free(bp); + return rc; +} + /* Use the OUI of the PF's perm addr and report the same mac addr * for the same VF-rep each time */ @@ -428,25 +516,9 @@ static int bnxt_vf_reps_create(struct bnxt *bp) vf_rep->vf_idx = i; vf_rep->tx_cfa_action = CFA_HANDLE_INVALID; - /* get cfa handles from FW */ - rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, - &vf_rep->tx_cfa_action, - &vf_rep->rx_cfa_code); - if (rc) { - rc = -ENOLINK; + rc = bnxt_alloc_vf_rep(bp, vf_rep, cfa_code_map); + if (rc) goto err; - } - cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx; - - vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, - GFP_KERNEL); - if (!vf_rep->dst) { - rc = -ENOMEM; - goto err; - } - /* only cfa_action is needed to mux a packet while TXing */ - vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action; - vf_rep->dst->u.port_info.lower_dev = bp->dev; bnxt_vf_rep_netdev_init(bp, vf_rep, dev); rc = register_netdev(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h index d7287651422f..5637a84884d7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h @@ -19,6 +19,8 @@ void bnxt_vf_reps_close(struct bnxt *bp); void bnxt_vf_reps_open(struct bnxt *bp); void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb); struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code); +int bnxt_vf_reps_alloc(struct bnxt *bp); +void bnxt_vf_reps_free(struct bnxt *bp); static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev) { @@ -61,5 +63,15 @@ static inline bool bnxt_dev_is_vf_rep(struct net_device *dev) { return false; } + +static inline int bnxt_vf_reps_alloc(struct bnxt *bp) +{ + return 0; +} + +static inline void bnxt_vf_reps_free(struct bnxt *bp) +{ +} + #endif /* CONFIG_BNXT_SRIOV */ #endif /* BNXT_VFR_H */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f56f3dbbc015..0e94db9cd45d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3269,6 +3269,9 @@ static void gem_prog_cmp_regs(struct macb *bp, struct ethtool_rx_flow_spec *fs) bool cmp_b = false; bool cmp_c = false; + if (!macb_is_gem(bp)) + return; + tp4sp_v = &(fs->h_u.tcp_ip4_spec); tp4sp_m = &(fs->m_u.tcp_ip4_spec); @@ -3637,6 +3640,7 @@ static void macb_restore_features(struct macb *bp) { struct net_device *netdev = bp->dev; netdev_features_t features = netdev->features; + struct ethtool_rx_fs_item *item; /* TX checksum offload */ macb_set_txcsum_feature(bp, features); @@ -3645,6 +3649,9 @@ static void macb_restore_features(struct macb *bp) macb_set_rxcsum_feature(bp, features); /* RX Flow Filters */ + list_for_each_entry(item, &bp->rx_fs_list.list, list) + gem_prog_cmp_regs(bp, &item->fs); + macb_set_rxflow_feature(bp, features); } @@ -3939,6 +3946,7 @@ static int macb_init(struct platform_device *pdev) reg = gem_readl(bp, DCFG8); bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3), GEM_BFEXT(T2SCR, reg)); + INIT_LIST_HEAD(&bp->rx_fs_list.list); if (bp->max_tuples > 0) { /* also needs one ethtype match to check IPv4 */ if (GEM_BFEXT(SCR2ETH, reg) > 0) { @@ -3949,7 +3957,6 @@ static int macb_init(struct platform_device *pdev) /* Filtering is supported in hw but don't enable it in kernel now */ dev->hw_features |= NETIF_F_NTUPLE; /* init Rx flow definitions */ - INIT_LIST_HEAD(&bp->rx_fs_list.list); bp->rx_fs_list.count = 0; spin_lock_init(&bp->rx_fs_lock); } else @@ -4642,7 +4649,6 @@ static int macb_probe(struct platform_device *pdev) struct net_device *dev; struct resource *regs; void __iomem *mem; - const char *mac; struct macb *bp; int err, val; @@ -4757,15 +4763,11 @@ static int macb_probe(struct platform_device *pdev) if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR) bp->rx_intr_mask |= MACB_BIT(RXUBR); - mac = of_get_mac_address(np); - if (PTR_ERR(mac) == -EPROBE_DEFER) { - err = -EPROBE_DEFER; + err = of_get_mac_address(np, bp->dev->dev_addr); + if (err == -EPROBE_DEFER) goto err_out_free_netdev; - } else if (!IS_ERR_OR_NULL(mac)) { - ether_addr_copy(bp->dev->dev_addr, mac); - } else { + else if (err) macb_get_hwaddr(bp); - } err = of_get_phy_mode(np, &interface); if (err) diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h index b248966837b4..7aad40b2aa73 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h @@ -412,7 +412,7 @@ | CN6XXX_INTR_M0UNWI_ERR \ | CN6XXX_INTR_M1UPB0_ERR \ | CN6XXX_INTR_M1UPWI_ERR \ - | CN6XXX_INTR_M1UPB0_ERR \ + | CN6XXX_INTR_M1UNB0_ERR \ | CN6XXX_INTR_M1UNWI_ERR \ | CN6XXX_INTR_INSTR_DB_OF_ERR \ | CN6XXX_INTR_SLIST_DB_OF_ERR \ diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index ecffebd513be..48ff6fb0eed9 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1385,7 +1385,6 @@ static int octeon_mgmt_probe(struct platform_device *pdev) struct net_device *netdev; struct octeon_mgmt *p; const __be32 *data; - const u8 *mac; struct resource *res_mix; struct resource *res_agl; struct resource *res_agl_prt_ctl; @@ -1502,11 +1501,8 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; - mac = of_get_mac_address(pdev->dev.of_node); - - if (!IS_ERR(mac)) - ether_addr_copy(netdev->dev_addr, mac); - else + result = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr); + if (result) eth_hw_addr_random(netdev); p->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f782e6af45e9..50bbe79fb93d 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -776,7 +776,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG; mbx.rq.qs_num = qs->vnic_id; mbx.rq.rq_num = qidx; - mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) | + mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) | (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) | (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 8ff28ed04b7f..0c783aadf393 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1474,7 +1474,6 @@ static int bgx_init_of_phy(struct bgx *bgx) device_for_each_child_node(&bgx->pdev->dev, fwn) { struct phy_device *pd; struct device_node *phy_np; - const char *mac; /* Should always be an OF node. But if it is not, we * cannot handle it, so exit the loop. @@ -1483,9 +1482,7 @@ static int bgx_init_of_phy(struct bgx *bgx) if (!node) break; - mac = of_get_mac_address(node); - if (!IS_ERR(mac)) - ether_addr_copy(bgx->lmac[lmac].mac, mac); + of_get_mac_address(node, bgx->lmac[lmac].mac); SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev); bgx->lmac[lmac].lmacid = lmac; diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index ce28820c57c9..12fcf84d67ad 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -323,8 +323,7 @@ void t4_cleanup_clip_tbl(struct adapter *adap) struct clip_tbl *ctbl = adap->clipt; if (ctbl) { - if (ctbl->cl_list) - kvfree(ctbl->cl_list); + kvfree(ctbl->cl_list); kvfree(ctbl); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index d2ba40c19696..a7f291c89702 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1794,11 +1794,25 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer temp_buff = { 0 }; struct sge_qbase_reg_field *sge_qbase; struct ireg_buf *ch_sge_dbg; + u8 padap_running = 0; int i, rc; + u32 size; - rc = cudbg_get_buff(pdbg_init, dbg_buff, - sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase), - &temp_buff); + /* Accessing SGE_QBASE_MAP[0-3] and SGE_QBASE_INDEX regs can + * lead to SGE missing doorbells under heavy traffic. So, only + * collect them when adapter is idle. + */ + for_each_port(padap, i) { + padap_running = netif_running(padap->port[i]); + if (padap_running) + break; + } + + size = sizeof(*ch_sge_dbg) * 2; + if (!padap_running) + size += sizeof(*sge_qbase); + + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1820,7 +1834,8 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, ch_sge_dbg++; } - if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5 && + !padap_running) { sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg; /* 1 addr reg SGE_QBASE_INDEX and 4 data reg * SGE_QBASE_MAP[0-3] diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 77648e4ab4cc..dd66b244466d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -157,8 +157,7 @@ static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init) static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init) { - if (pdbg_init->compress_buff) - vfree(pdbg_init->compress_buff); + vfree(pdbg_init->compress_buff); } int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index b1cae5a19839..bc581b149b11 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -174,31 +174,31 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, WORD_MASK, f->fs.nat_lip[15] | f->fs.nat_lip[14] << 8 | f->fs.nat_lip[13] << 16 | - f->fs.nat_lip[12] << 24, 1); + (u64)f->fs.nat_lip[12] << 24, 1); set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1, WORD_MASK, f->fs.nat_lip[11] | f->fs.nat_lip[10] << 8 | f->fs.nat_lip[9] << 16 | - f->fs.nat_lip[8] << 24, 1); + (u64)f->fs.nat_lip[8] << 24, 1); set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2, WORD_MASK, f->fs.nat_lip[7] | f->fs.nat_lip[6] << 8 | f->fs.nat_lip[5] << 16 | - f->fs.nat_lip[4] << 24, 1); + (u64)f->fs.nat_lip[4] << 24, 1); set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3, WORD_MASK, f->fs.nat_lip[3] | f->fs.nat_lip[2] << 8 | f->fs.nat_lip[1] << 16 | - f->fs.nat_lip[0] << 24, 1); + (u64)f->fs.nat_lip[0] << 24, 1); } else { set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W, WORD_MASK, f->fs.nat_lip[3] | f->fs.nat_lip[2] << 8 | f->fs.nat_lip[1] << 16 | - f->fs.nat_lip[0] << 24, 1); + (u64)f->fs.nat_lip[0] << 25, 1); } } @@ -208,25 +208,25 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, WORD_MASK, f->fs.nat_fip[15] | f->fs.nat_fip[14] << 8 | f->fs.nat_fip[13] << 16 | - f->fs.nat_fip[12] << 24, 1); + (u64)f->fs.nat_fip[12] << 24, 1); set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1, WORD_MASK, f->fs.nat_fip[11] | f->fs.nat_fip[10] << 8 | f->fs.nat_fip[9] << 16 | - f->fs.nat_fip[8] << 24, 1); + (u64)f->fs.nat_fip[8] << 24, 1); set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2, WORD_MASK, f->fs.nat_fip[7] | f->fs.nat_fip[6] << 8 | f->fs.nat_fip[5] << 16 | - f->fs.nat_fip[4] << 24, 1); + (u64)f->fs.nat_fip[4] << 24, 1); set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3, WORD_MASK, f->fs.nat_fip[3] | f->fs.nat_fip[2] << 8 | f->fs.nat_fip[1] << 16 | - f->fs.nat_fip[0] << 24, 1); + (u64)f->fs.nat_fip[0] << 24, 1); } else { set_tcb_field(adap, f, tid, @@ -234,13 +234,13 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, WORD_MASK, f->fs.nat_fip[3] | f->fs.nat_fip[2] << 8 | f->fs.nat_fip[1] << 16 | - f->fs.nat_fip[0] << 24, 1); + (u64)f->fs.nat_fip[0] << 24, 1); } } set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK, (dp ? (nat_lp[1] | nat_lp[0] << 8) : 0) | - (sp ? (nat_fp[1] << 16 | nat_fp[0] << 24) : 0), + (sp ? (nat_fp[1] << 16 | (u64)nat_fp[0] << 24) : 0), 1); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index dede02505ceb..a5d2f84dcdd5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -524,13 +524,9 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) out_no_mem: for (i = 0; i < t->size; i++) { struct cxgb4_link *link = &t->table[i]; - - if (link->tid_map) - kvfree(link->tid_map); + kvfree(link->tid_map); } - - if (t) - kvfree(t); + kvfree(t); return NULL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 98829e482bfa..80882cfc370f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2090,7 +2090,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1190, 0x1194, 0x11a0, 0x11a4, 0x11b0, 0x11b4, - 0x11fc, 0x1274, + 0x11fc, 0x123c, + 0x1254, 0x1274, 0x1280, 0x133c, 0x1800, 0x18fc, 0x3000, 0x302c, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1115b8f9ea4e..a3f5b80888e5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -350,18 +350,6 @@ static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word, } /* - * chcr_ktls_mark_tcb_close: mark tcb state to CLOSE - * @tx_info - driver specific tls info. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info) -{ - return chcr_set_tcb_field(tx_info, TCB_T_STATE_W, - TCB_T_STATE_V(TCB_T_STATE_M), - CHCR_TCB_STATE_CLOSED, 1); -} - -/* * chcr_ktls_dev_del: call back for tls_dev_del. * Remove the tid and l2t entry and close the connection. * it per connection basis. @@ -395,8 +383,6 @@ static void chcr_ktls_dev_del(struct net_device *netdev, /* clear tid */ if (tx_info->tid != -1) { - /* clear tcb state and then release tid */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tx_info->tid, tx_info->ip_family); } @@ -574,7 +560,6 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, return 0; free_tid: - chcr_ktls_mark_tcb_close(tx_info); #if IS_ENABLED(CONFIG_IPV6) /* clear clip entry */ if (tx_info->ip_family == AF_INET6) @@ -672,10 +657,6 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, if (tx_info->pending_close) { spin_unlock(&tx_info->lock); if (!status) { - /* it's a late success, tcb status is established, - * mark it close. - */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tid, tx_info->ip_family); } @@ -1664,54 +1645,6 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, } /* - * chcr_ktls_update_snd_una: Reset the SEND_UNA. It will be done to avoid - * sending the same segment again. It will discard the segment which is before - * the current tx max. - * @tx_info - driver specific tls info. - * @q - TX queue. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_update_snd_una(struct chcr_ktls_info *tx_info, - struct sge_eth_txq *q) -{ - struct fw_ulptx_wr *wr; - unsigned int ndesc; - int credits; - void *pos; - u32 len; - - len = sizeof(*wr) + roundup(CHCR_SET_TCB_FIELD_LEN, 16); - ndesc = DIV_ROUND_UP(len, 64); - - credits = chcr_txq_avail(&q->q) - ndesc; - if (unlikely(credits < 0)) { - chcr_eth_txq_stop(q); - return NETDEV_TX_BUSY; - } - - pos = &q->q.desc[q->q.pidx]; - - wr = pos; - /* ULPTX wr */ - wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); - wr->cookie = 0; - /* fill len in wr field */ - wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); - - pos += sizeof(*wr); - - pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, - TCB_SND_UNA_RAW_W, - TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M), - TCB_SND_UNA_RAW_V(0), 0); - - chcr_txq_advance(&q->q, ndesc); - cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); - - return 0; -} - -/* * chcr_end_part_handler: This handler will handle the record which * is complete or if record's end part is received. T6 adapter has a issue that * it can't send out TAG with partial record so if its an end part then we have @@ -1735,7 +1668,9 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, struct sge_eth_txq *q, u32 skb_offset, u32 tls_end_offset, bool last_wr) { + bool free_skb_if_tx_fails = false; struct sk_buff *nskb = NULL; + /* check if it is a complete record */ if (tls_end_offset == record->len) { nskb = skb; @@ -1758,6 +1693,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, if (last_wr) dev_kfree_skb_any(skb); + else + free_skb_if_tx_fails = true; last_wr = true; @@ -1769,6 +1706,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, record->num_frags, (last_wr && tcp_push_no_fin), mss)) { + if (free_skb_if_tx_fails) + dev_kfree_skb_any(skb); goto out; } tx_info->prev_seq = record->end_seq; @@ -1905,11 +1844,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, /* reset tcp_seq as per the prior_data_required len */ tcp_seq -= prior_data_len; } - /* reset snd una, so the middle record won't send the already - * sent part. - */ - if (chcr_ktls_update_snd_una(tx_info, q)) - goto out; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts); } else { atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts); @@ -2010,12 +1944,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) * we will send the complete record again. */ + spin_lock_irqsave(&tx_ctx->base.lock, flags); + do { - int i; cxgb4_reclaim_completed_tx(adap, &q->q, true); - /* lock taken */ - spin_lock_irqsave(&tx_ctx->base.lock, flags); /* fetch the tls record */ record = tls_get_record(&tx_ctx->base, tcp_seq, &tx_info->record_no); @@ -2074,11 +2007,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_end_offset, skb_offset, 0); - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (ret) { /* free the refcount taken earlier */ if (tls_end_offset < data_len) dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); goto out; } @@ -2088,16 +2021,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) continue; } - /* increase page reference count of the record, so that there - * won't be any chance of page free in middle if in case stack - * receives ACK and try to delete the record. - */ - for (i = 0; i < record->num_frags; i++) - __skb_frag_ref(&record->frags[i]); - /* lock cleared */ - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); - - /* if a tls record is finishing in this SKB */ if (tls_end_offset <= data_len) { ret = chcr_end_part_handler(tx_info, skb, record, @@ -2122,13 +2045,9 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) data_len = 0; } - /* clear the frag ref count which increased locally before */ - for (i = 0; i < record->num_frags; i++) { - /* clear the frag ref count */ - __skb_frag_unref(&record->frags[i]); - } /* if any failure, come out from the loop. */ if (ret) { + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (th->fin) dev_kfree_skb_any(skb); @@ -2143,6 +2062,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) } while (data_len > 0); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); atomic64_inc(&port_stats->ktls_tx_encrypted_packets); atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 252adfa5d837..e7f7121821be 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1385,7 +1385,7 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) { struct dm9000_plat_data *pdata; struct device_node *np = dev->of_node; - const void *mac_addr; + int ret; if (!IS_ENABLED(CONFIG_OF) || !np) return ERR_PTR(-ENXIO); @@ -1399,11 +1399,9 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) if (of_find_property(np, "davicom,no-eeprom", NULL)) pdata->flags |= DM9000_PLATF_NO_EEPROM; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(pdata->dev_addr, mac_addr); - else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) - return ERR_CAST(mac_addr); + ret = of_get_mac_address(np, pdata->dev_addr); + if (ret == -EPROBE_DEFER) + return ERR_PTR(ret); return pdata; } @@ -1471,8 +1469,10 @@ dm9000_probe(struct platform_device *pdev) /* Init network device */ ndev = alloc_etherdev(sizeof(struct board_info)); - if (!ndev) - return -ENOMEM; + if (!ndev) { + ret = -ENOMEM; + goto out_regulator_disable; + } SET_NETDEV_DEV(ndev, &pdev->dev); diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 3d9b0b161e24..e1b43b07755b 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1151,11 +1151,7 @@ static int ethoc_probe(struct platform_device *pdev) ether_addr_copy(netdev->dev_addr, pdata->hwaddr); priv->phy_id = pdata->phy_id; } else { - const void *mac; - - mac = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac)) - ether_addr_copy(netdev->dev_addr, mac); + of_get_mac_address(pdev->dev.of_node, netdev->dev_addr); priv->phy_id = -1; } diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 815fb62c4b02..e3954d8835e7 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -575,7 +575,6 @@ static s32 nps_enet_probe(struct platform_device *pdev) struct net_device *ndev; struct nps_enet_priv *priv; s32 err = 0; - const char *mac_addr; if (!dev->of_node) return -ENODEV; @@ -602,10 +601,8 @@ static s32 nps_enet_probe(struct platform_device *pdev) dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs_base); /* set kernel MAC address to dev */ - mac_addr = of_get_mac_address(dev->of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - else + err = of_get_mac_address(dev->of_node, ndev->dev_addr); + if (err) eth_hw_addr_random(ndev); /* Get IRQ number */ diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 3f9175bdce77..3d937b4650b2 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -26,6 +26,7 @@ config FEC ARCH_MXC || SOC_IMX28 || COMPILE_TEST) default ARCH_MXC || SOC_IMX28 if ARM select CRC32 + select NET_SELFTESTS select PHYLIB imply PTP_1588_CLOCK help diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index 644ef9ae02a3..c2ef74052ef8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o -fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o +fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o # Needed by the tracing framework CFLAGS_dpaa2-eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c new file mode 100644 index 000000000000..f9451ec5f2cb --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DPAA2 Ethernet Switch flower support + * + * Copyright 2021 NXP + * + */ + +#include "dpaa2-switch.h" + +static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls, + struct dpsw_acl_key *acl_key) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + struct netlink_ext_ack *extack = cls->common.extack; + struct dpsw_acl_fields *acl_h, *acl_m; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported keys used"); + return -EOPNOTSUPP; + } + + acl_h = &acl_key->match; + acl_m = &acl_key->mask; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + acl_h->l3_protocol = match.key->ip_proto; + acl_h->l2_ether_type = be16_to_cpu(match.key->n_proto); + acl_m->l3_protocol = match.mask->ip_proto; + acl_m->l2_ether_type = be16_to_cpu(match.mask->n_proto); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + ether_addr_copy(acl_h->l2_dest_mac, &match.key->dst[0]); + ether_addr_copy(acl_h->l2_source_mac, &match.key->src[0]); + ether_addr_copy(acl_m->l2_dest_mac, &match.mask->dst[0]); + ether_addr_copy(acl_m->l2_source_mac, &match.mask->src[0]); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + acl_h->l2_vlan_id = match.key->vlan_id; + acl_h->l2_tpid = be16_to_cpu(match.key->vlan_tpid); + acl_h->l2_pcp_dei = match.key->vlan_priority << 1 | + match.key->vlan_dei; + + acl_m->l2_vlan_id = match.mask->vlan_id; + acl_m->l2_tpid = be16_to_cpu(match.mask->vlan_tpid); + acl_m->l2_pcp_dei = match.mask->vlan_priority << 1 | + match.mask->vlan_dei; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + acl_h->l3_source_ip = be32_to_cpu(match.key->src); + acl_h->l3_dest_ip = be32_to_cpu(match.key->dst); + acl_m->l3_source_ip = be32_to_cpu(match.mask->src); + acl_m->l3_dest_ip = be32_to_cpu(match.mask->dst); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + acl_h->l4_source_port = be16_to_cpu(match.key->src); + acl_h->l4_dest_port = be16_to_cpu(match.key->dst); + acl_m->l4_source_port = be16_to_cpu(match.mask->src); + acl_m->l4_dest_port = be16_to_cpu(match.mask->dst); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + + flow_rule_match_ip(rule, &match); + if (match.mask->ttl != 0) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL not supported"); + return -EOPNOTSUPP; + } + + if ((match.mask->tos & 0x3) != 0) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on ECN not supported, only DSCP"); + return -EOPNOTSUPP; + } + + acl_h->l3_dscp = match.key->tos >> 2; + acl_m->l3_dscp = match.mask->tos >> 2; + } + + return 0; +} + +int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct dpsw_acl_key *acl_key = &entry->key; + struct device *dev = ethsw->dev; + u8 *cmd_buff; + int err; + + cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL); + if (!cmd_buff) + return -ENOMEM; + + dpsw_acl_prepare_entry_cfg(acl_key, cmd_buff); + + acl_entry_cfg->key_iova = dma_map_single(dev, cmd_buff, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { + dev_err(dev, "DMA mapping failed\n"); + return -EFAULT; + } + + err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, acl_entry_cfg); + + dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); + return err; + } + + kfree(cmd_buff); + + return 0; +} + +static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg; + struct dpsw_acl_key *acl_key = &entry->key; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct device *dev = ethsw->dev; + u8 *cmd_buff; + int err; + + cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL); + if (!cmd_buff) + return -ENOMEM; + + dpsw_acl_prepare_entry_cfg(acl_key, cmd_buff); + + acl_entry_cfg->key_iova = dma_map_single(dev, cmd_buff, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { + dev_err(dev, "DMA mapping failed\n"); + return -EFAULT; + } + + err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, acl_entry_cfg); + + dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + return err; + } + + kfree(cmd_buff); + + return 0; +} + +static int +dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp; + struct list_head *pos, *n; + int index = 0; + + if (list_empty(&acl_tbl->entries)) { + list_add(&entry->list, &acl_tbl->entries); + return index; + } + + list_for_each_safe(pos, n, &acl_tbl->entries) { + tmp = list_entry(pos, struct dpaa2_switch_acl_entry, list); + if (entry->prio < tmp->prio) + break; + index++; + } + list_add(&entry->list, pos->prev); + return index; +} + +static struct dpaa2_switch_acl_entry* +dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl, + int index) +{ + struct dpaa2_switch_acl_entry *tmp; + int i = 0; + + list_for_each_entry(tmp, &acl_tbl->entries, list) { + if (i == index) + return tmp; + ++i; + } + + return NULL; +} + +static int +dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry, + int precedence) +{ + int err; + + err = dpaa2_switch_acl_entry_remove(acl_tbl, entry); + if (err) + return err; + + entry->cfg.precedence = precedence; + return dpaa2_switch_acl_entry_add(acl_tbl, entry); +} + +static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp; + int index, i, precedence, err; + + /* Add the new ACL entry to the linked list and get its index */ + index = dpaa2_switch_acl_entry_add_to_list(acl_tbl, entry); + + /* Move up in priority the ACL entries to make space + * for the new filter. + */ + precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - acl_tbl->num_rules - 1; + for (i = 0; i < index; i++) { + tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i); + + err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp, + precedence); + if (err) + return err; + + precedence++; + } + + /* Add the new entry to hardware */ + entry->cfg.precedence = precedence; + err = dpaa2_switch_acl_entry_add(acl_tbl, entry); + acl_tbl->num_rules++; + + return err; +} + +static struct dpaa2_switch_acl_entry * +dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl, + unsigned long cookie) +{ + struct dpaa2_switch_acl_entry *tmp, *n; + + list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) { + if (tmp->cookie == cookie) + return tmp; + } + return NULL; +} + +static int +dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp, *n; + int index = 0; + + list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) { + if (tmp->cookie == entry->cookie) + return index; + index++; + } + return -ENOENT; +} + +static int +dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp; + int index, i, precedence, err; + + index = dpaa2_switch_acl_entry_get_index(acl_tbl, entry); + + /* Remove from hardware the ACL entry */ + err = dpaa2_switch_acl_entry_remove(acl_tbl, entry); + if (err) + return err; + + acl_tbl->num_rules--; + + /* Remove it from the list also */ + list_del(&entry->list); + + /* Move down in priority the entries over the deleted one */ + precedence = entry->cfg.precedence; + for (i = index - 1; i >= 0; i--) { + tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i); + err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp, + precedence); + if (err) + return err; + + precedence--; + } + + kfree(entry); + + return 0; +} + +static int dpaa2_switch_tc_parse_action(struct ethsw_core *ethsw, + struct flow_action_entry *cls_act, + struct dpsw_acl_result *dpsw_act, + struct netlink_ext_ack *extack) +{ + int err = 0; + + switch (cls_act->id) { + case FLOW_ACTION_TRAP: + dpsw_act->action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; + break; + case FLOW_ACTION_REDIRECT: + if (!dpaa2_switch_port_dev_check(cls_act->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Destination not a DPAA2 switch port"); + return -EOPNOTSUPP; + } + + dpsw_act->if_id = dpaa2_switch_get_index(ethsw, cls_act->dev); + dpsw_act->action = DPSW_ACL_ACTION_REDIRECT; + break; + case FLOW_ACTION_DROP: + dpsw_act->action = DPSW_ACL_ACTION_DROP; + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Action not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct dpaa2_switch_acl_entry *acl_entry; + struct flow_action_entry *act; + int err; + + if (!flow_offload_has_one_action(&rule->action)) { + NL_SET_ERR_MSG(extack, "Only singular actions are supported"); + return -EOPNOTSUPP; + } + + if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) { + NL_SET_ERR_MSG(extack, "Maximum filter capacity reached"); + return -ENOMEM; + } + + acl_entry = kzalloc(sizeof(*acl_entry), GFP_KERNEL); + if (!acl_entry) + return -ENOMEM; + + err = dpaa2_switch_flower_parse_key(cls, &acl_entry->key); + if (err) + goto free_acl_entry; + + act = &rule->action.entries[0]; + err = dpaa2_switch_tc_parse_action(ethsw, act, + &acl_entry->cfg.result, extack); + if (err) + goto free_acl_entry; + + acl_entry->prio = cls->common.prio; + acl_entry->cookie = cls->cookie; + + err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry); + if (err) + goto free_acl_entry; + + return 0; + +free_acl_entry: + kfree(acl_entry); + + return err; +} + +int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls) +{ + struct dpaa2_switch_acl_entry *entry; + + entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie); + if (!entry) + return 0; + + return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry); +} + +int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct dpaa2_switch_acl_entry *acl_entry; + struct flow_action_entry *act; + int err; + + if (!flow_offload_has_one_action(&cls->rule->action)) { + NL_SET_ERR_MSG(extack, "Only singular actions are supported"); + return -EOPNOTSUPP; + } + + if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) { + NL_SET_ERR_MSG(extack, "Maximum filter capacity reached"); + return -ENOMEM; + } + + acl_entry = kzalloc(sizeof(*acl_entry), GFP_KERNEL); + if (!acl_entry) + return -ENOMEM; + + act = &cls->rule->action.entries[0]; + err = dpaa2_switch_tc_parse_action(ethsw, act, + &acl_entry->cfg.result, extack); + if (err) + goto free_acl_entry; + + acl_entry->prio = cls->common.prio; + acl_entry->cookie = cls->cookie; + + err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry); + if (err) + goto free_acl_entry; + + return 0; + +free_acl_entry: + kfree(acl_entry); + + return err; +} + +int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls) +{ + struct dpaa2_switch_acl_entry *entry; + + entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie); + if (!entry) + return 0; + + return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 80efc8116963..05de37c3b64c 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -14,6 +14,7 @@ #include <linux/kthread.h> #include <linux/workqueue.h> #include <linux/iommu.h> +#include <net/pkt_cls.h> #include <linux/fsl/mc.h> @@ -40,6 +41,17 @@ static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *e return NULL; } +static struct dpaa2_switch_acl_tbl * +dpaa2_switch_acl_tbl_get_unused(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (!ethsw->acls[i].in_use) + return ðsw->acls[i]; + return NULL; +} + static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv, struct net_device *bridge_dev) { @@ -1114,6 +1126,259 @@ err_exit: return NETDEV_TX_OK; } +static int +dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *f) +{ + switch (f->command) { + case FLOW_CLS_REPLACE: + return dpaa2_switch_cls_flower_replace(acl_tbl, f); + case FLOW_CLS_DESTROY: + return dpaa2_switch_cls_flower_destroy(acl_tbl, f); + default: + return -EOPNOTSUPP; + } +} + +static int +dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *f) +{ + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return dpaa2_switch_cls_matchall_replace(acl_tbl, f); + case TC_CLSMATCHALL_DESTROY: + return dpaa2_switch_cls_matchall_destroy(acl_tbl, f); + default: + return -EOPNOTSUPP; + } +} + +static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, + void *cb_priv) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return dpaa2_switch_setup_tc_cls_flower(cb_priv, type_data); + case TC_SETUP_CLSMATCHALL: + return dpaa2_switch_setup_tc_cls_matchall(cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(dpaa2_switch_block_cb_list); + +static int dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_acl_if_cfg acl_if_cfg; + int err; + + if (port_priv->acl_tbl) + return -EINVAL; + + acl_if_cfg.if_id[0] = port_priv->idx; + acl_if_cfg.num_ifs = 1; + err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, &acl_if_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); + return err; + } + + acl_tbl->ports |= BIT(port_priv->idx); + port_priv->acl_tbl = acl_tbl; + + return 0; +} + +static int +dpaa2_switch_port_acl_tbl_unbind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_acl_if_cfg acl_if_cfg; + int err; + + if (port_priv->acl_tbl != acl_tbl) + return -EINVAL; + + acl_if_cfg.if_id[0] = port_priv->idx; + acl_if_cfg.num_ifs = 1; + err = dpsw_acl_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, &acl_if_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); + return err; + } + + acl_tbl->ports &= ~BIT(port_priv->idx); + port_priv->acl_tbl = NULL; + return 0; +} + +static int dpaa2_switch_port_block_bind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct dpaa2_switch_acl_tbl *old_acl_tbl = port_priv->acl_tbl; + int err; + + /* If the port is already bound to this ACL table then do nothing. This + * can happen when this port is the first one to join a tc block + */ + if (port_priv->acl_tbl == acl_tbl) + return 0; + + err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_acl_tbl); + if (err) + return err; + + /* Mark the previous ACL table as being unused if this was the last + * port that was using it. + */ + if (old_acl_tbl->ports == 0) + old_acl_tbl->in_use = false; + + return dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl); +} + +static int dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *new_acl_tbl; + int err; + + /* We are the last port that leaves a block (an ACL table). + * We'll continue to use this table. + */ + if (acl_tbl->ports == BIT(port_priv->idx)) + return 0; + + err = dpaa2_switch_port_acl_tbl_unbind(port_priv, acl_tbl); + if (err) + return err; + + if (acl_tbl->ports == 0) + acl_tbl->in_use = false; + + new_acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw); + new_acl_tbl->in_use = true; + return dpaa2_switch_port_acl_tbl_bind(port_priv, new_acl_tbl); +} + +static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *acl_tbl; + struct flow_block_cb *block_cb; + bool register_block = false; + int err; + + block_cb = flow_block_cb_lookup(f->block, + dpaa2_switch_port_setup_tc_block_cb_ig, + ethsw); + + if (!block_cb) { + /* If the ACL table is not already known, then this port must + * be the first to join it. In this case, we can just continue + * to use our private table + */ + acl_tbl = port_priv->acl_tbl; + + block_cb = flow_block_cb_alloc(dpaa2_switch_port_setup_tc_block_cb_ig, + ethsw, acl_tbl, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + register_block = true; + } else { + acl_tbl = flow_block_cb_priv(block_cb); + } + + flow_block_cb_incref(block_cb); + err = dpaa2_switch_port_block_bind(port_priv, acl_tbl); + if (err) + goto err_block_bind; + + if (register_block) { + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, + &dpaa2_switch_block_cb_list); + } + + return 0; + +err_block_bind: + if (!flow_block_cb_decref(block_cb)) + flow_block_cb_free(block_cb); + return err; +} + +static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *acl_tbl; + struct flow_block_cb *block_cb; + int err; + + block_cb = flow_block_cb_lookup(f->block, + dpaa2_switch_port_setup_tc_block_cb_ig, + ethsw); + if (!block_cb) + return; + + acl_tbl = flow_block_cb_priv(block_cb); + err = dpaa2_switch_port_block_unbind(port_priv, acl_tbl); + if (!err && !flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } +} + +static int dpaa2_switch_setup_tc_block(struct net_device *netdev, + struct flow_block_offload *f) +{ + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + f->driver_block_list = &dpaa2_switch_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + return dpaa2_switch_setup_tc_block_bind(netdev, f); + case FLOW_BLOCK_UNBIND: + dpaa2_switch_setup_tc_block_unbind(netdev, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dpaa2_switch_port_setup_tc(struct net_device *netdev, + enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: { + return dpaa2_switch_setup_tc_block(netdev, type_data); + } + default: + return -EOPNOTSUPP; + } + + return 0; +} + static const struct net_device_ops dpaa2_switch_port_ops = { .ndo_open = dpaa2_switch_port_open, .ndo_stop = dpaa2_switch_port_stop, @@ -1130,6 +1395,7 @@ static const struct net_device_ops dpaa2_switch_port_ops = { .ndo_start_xmit = dpaa2_switch_port_tx, .ndo_get_port_parent_id = dpaa2_switch_port_parent_id, .ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name, + .ndo_setup_tc = dpaa2_switch_port_setup_tc, }; bool dpaa2_switch_port_dev_check(const struct net_device *netdev) @@ -1832,7 +2098,7 @@ static void dpaa2_switch_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; if (is_unicast_ether_addr(fdb_info->addr)) err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev), @@ -1847,7 +2113,7 @@ static void dpaa2_switch_event_work(struct work_struct *work) &fdb_info->info, NULL); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; if (is_unicast_ether_addr(fdb_info->addr)) dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr); @@ -2676,61 +2942,17 @@ err_close: static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv, const char *mac) { - struct net_device *netdev = port_priv->netdev; - struct dpsw_acl_entry_cfg acl_entry_cfg; - struct dpsw_acl_fields *acl_h; - struct dpsw_acl_fields *acl_m; - struct dpsw_acl_key acl_key; - struct device *dev; - u8 *cmd_buff; - int err; - - dev = port_priv->netdev->dev.parent; - acl_h = &acl_key.match; - acl_m = &acl_key.mask; - - if (port_priv->acl_num_rules >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) { - netdev_err(netdev, "ACL full\n"); - return -ENOMEM; - } - - memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg)); - memset(&acl_key, 0, sizeof(acl_key)); + struct dpaa2_switch_acl_entry acl_entry = {0}; /* Match on the destination MAC address */ - ether_addr_copy(acl_h->l2_dest_mac, mac); - eth_broadcast_addr(acl_m->l2_dest_mac); + ether_addr_copy(acl_entry.key.match.l2_dest_mac, mac); + eth_broadcast_addr(acl_entry.key.mask.l2_dest_mac); - cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL); - if (!cmd_buff) - return -ENOMEM; - dpsw_acl_prepare_entry_cfg(&acl_key, cmd_buff); - - memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg)); - acl_entry_cfg.precedence = port_priv->acl_num_rules; - acl_entry_cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; - acl_entry_cfg.key_iova = dma_map_single(dev, cmd_buff, - DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, acl_entry_cfg.key_iova))) { - netdev_err(netdev, "DMA mapping failed\n"); - return -EFAULT; - } + /* Trap to CPU */ + acl_entry.cfg.precedence = 0; + acl_entry.cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; - err = dpsw_acl_add_entry(port_priv->ethsw_data->mc_io, 0, - port_priv->ethsw_data->dpsw_handle, - port_priv->acl_tbl, &acl_entry_cfg); - - dma_unmap_single(dev, acl_entry_cfg.key_iova, sizeof(cmd_buff), - DMA_TO_DEVICE); - if (err) { - netdev_err(netdev, "dpsw_acl_add_entry() failed %d\n", err); - return err; - } - - port_priv->acl_num_rules++; - - return 0; + return dpaa2_switch_acl_entry_add(port_priv->acl_tbl, &acl_entry); } static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) @@ -2743,12 +2965,12 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) }; struct net_device *netdev = port_priv->netdev; struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *acl_tbl; struct dpsw_fdb_cfg fdb_cfg = {0}; - struct dpsw_acl_if_cfg acl_if_cfg; struct dpsw_if_attr dpsw_if_attr; struct dpaa2_switch_fdb *fdb; struct dpsw_acl_cfg acl_cfg; - u16 fdb_id; + u16 fdb_id, acl_tbl_id; int err; /* Get the Tx queue for this specific port */ @@ -2792,21 +3014,22 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) /* Create an ACL table to be used by this switch port */ acl_cfg.max_entries = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES; err = dpsw_acl_add(ethsw->mc_io, 0, ethsw->dpsw_handle, - &port_priv->acl_tbl, &acl_cfg); + &acl_tbl_id, &acl_cfg); if (err) { netdev_err(netdev, "dpsw_acl_add err %d\n", err); return err; } - acl_if_cfg.if_id[0] = port_priv->idx; - acl_if_cfg.num_ifs = 1; - err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, - port_priv->acl_tbl, &acl_if_cfg); - if (err) { - netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); - dpsw_acl_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, - port_priv->acl_tbl); - } + acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw); + acl_tbl->ethsw = ethsw; + acl_tbl->id = acl_tbl_id; + acl_tbl->in_use = true; + acl_tbl->num_rules = 0; + INIT_LIST_HEAD(&acl_tbl->entries); + + err = dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl); + if (err) + return err; err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa); if (err) @@ -2858,6 +3081,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) } kfree(ethsw->fdbs); + kfree(ethsw->acls); kfree(ethsw->ports); dpaa2_switch_takedown(sw_dev); @@ -2915,7 +3139,9 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, /* The DPAA2 switch's ingress path depends on the VLAN table, * thus we are not able to disable VLAN filtering. */ - port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER; + port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_STAG_FILTER | + NETIF_F_HW_TC; err = dpaa2_switch_port_init(port_priv, port_idx); if (err) @@ -2983,6 +3209,13 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) goto err_free_ports; } + ethsw->acls = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->acls), + GFP_KERNEL); + if (!ethsw->acls) { + err = -ENOMEM; + goto err_free_fdbs; + } + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { err = dpaa2_switch_probe_port(ethsw, i); if (err) @@ -3031,6 +3264,8 @@ err_stop: err_free_netdev: for (i--; i >= 0; i--) free_netdev(ethsw->ports[i]->netdev); + kfree(ethsw->acls); +err_free_fdbs: kfree(ethsw->fdbs); err_free_ports: kfree(ethsw->ports); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index 0ae1d27c811e..bdef71f234cb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -18,6 +18,7 @@ #include <net/switchdev.h> #include <linux/if_bridge.h> #include <linux/fsl/mc.h> +#include <net/pkt_cls.h> #include <soc/fsl/dpaa2-io.h> #include "dpsw.h" @@ -80,6 +81,8 @@ (DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN) #define DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES 16 +#define DPAA2_ETHSW_PORT_DEFAULT_TRAPS 1 + #define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE 256 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops; @@ -101,6 +104,34 @@ struct dpaa2_switch_fdb { bool in_use; }; +struct dpaa2_switch_acl_entry { + struct list_head list; + u16 prio; + unsigned long cookie; + + struct dpsw_acl_entry_cfg cfg; + struct dpsw_acl_key key; +}; + +struct dpaa2_switch_acl_tbl { + struct list_head entries; + struct ethsw_core *ethsw; + u64 ports; + + u16 id; + u8 num_rules; + bool in_use; +}; + +static inline bool +dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_acl_tbl *acl_tbl) +{ + if ((acl_tbl->num_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >= + DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) + return true; + return false; +} + /* Per port private data */ struct ethsw_port_priv { struct net_device *netdev; @@ -118,8 +149,7 @@ struct ethsw_port_priv { bool ucast_flood; bool learn_ena; - u16 acl_tbl; - u8 acl_num_rules; + struct dpaa2_switch_acl_tbl *acl_tbl; }; /* Switch data */ @@ -145,8 +175,21 @@ struct ethsw_core { int napi_users; struct dpaa2_switch_fdb *fdbs; + struct dpaa2_switch_acl_tbl *acls; }; +static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw, + struct net_device *netdev) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (ethsw->ports[i]->netdev == netdev) + return ethsw->ports[i]->idx; + + return -EINVAL; +} + static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw) { if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) { @@ -183,4 +226,21 @@ int dpaa2_switch_port_vlans_del(struct net_device *netdev, typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv, struct fdb_dump_entry *fdb_entry, void *data); + +/* TC offload */ + +int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls); + +int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls); + +int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls); + +int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls); + +int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry); #endif /* __ETHSW_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h index 1747cee19a72..cb13e740f72b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h @@ -77,6 +77,7 @@ #define DPSW_CMDID_ACL_ADD DPSW_CMD_ID(0x090) #define DPSW_CMDID_ACL_REMOVE DPSW_CMD_ID(0x091) #define DPSW_CMDID_ACL_ADD_ENTRY DPSW_CMD_ID(0x092) +#define DPSW_CMDID_ACL_REMOVE_ENTRY DPSW_CMD_ID(0x093) #define DPSW_CMDID_ACL_ADD_IF DPSW_CMD_ID(0x094) #define DPSW_CMDID_ACL_REMOVE_IF DPSW_CMD_ID(0x095) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c index 6704efe89bc1..6352d6d1ecba 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c @@ -1544,3 +1544,38 @@ int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, return mc_send_command(mc_io, &cmd); } + +/** + * dpsw_acl_remove_entry() - Removes an entry from ACL. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Entry configuration + * + * warning: This function has to be called after dpsw_acl_set_entry_cfg() + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg) +{ + struct dpsw_cmd_acl_entry *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_ENTRY, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_entry *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->result_if_id = cpu_to_le16(cfg->result.if_id); + cmd_params->precedence = cpu_to_le32(cfg->precedence); + cmd_params->key_iova = cpu_to_le64(cfg->key_iova); + dpsw_set_field(cmd_params->result_action, + RESULT_ACTION, + cfg->result.action); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h index 08e37c475ae8..5ef221a25b02 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h @@ -749,4 +749,7 @@ void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 acl_id, const struct dpsw_acl_entry_cfg *cfg); + +int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg); #endif /* __FSL_DPSW_H */ diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index ab92382c399a..d88f60c2bb82 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -2,6 +2,7 @@ config FSL_ENETC tristate "ENETC PF driver" depends on PCI && PCI_MSI + depends on FSL_ENETC_IERB || FSL_ENETC_IERB=n select FSL_ENETC_MDIO select PHYLINK select PCS_LYNX @@ -25,6 +26,14 @@ config FSL_ENETC_VF If compiled as module (M), the module name is fsl-enetc-vf. +config FSL_ENETC_IERB + tristate "ENETC IERB driver" + help + This driver configures the Integrated Endpoint Register Block on NXP + LS1028A. + + If compiled as module (M), the module name is fsl-enetc-ierb. + config FSL_ENETC_MDIO tristate "ENETC MDIO driver" depends on PCI && MDIO_DEVRES && MDIO_BUS diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile index 74f7ac253b8b..a139f2e9d59f 100644 --- a/drivers/net/ethernet/freescale/enetc/Makefile +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -11,6 +11,9 @@ obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o fsl-enetc-vf-y := enetc_vf.o $(common-objs) fsl-enetc-vf-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o +obj-$(CONFIG_FSL_ENETC_IERB) += fsl-enetc-ierb.o +fsl-enetc-ierb-y := enetc_ierb.o + obj-$(CONFIG_FSL_ENETC_MDIO) += fsl-enetc-mdio.o fsl-enetc-mdio-y := enetc_pci_mdio.o enetc_mdio.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 57049ae97201..4f23829e7317 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -6,8 +6,29 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/vmalloc.h> +#include <linux/ptp_classify.h> #include <net/pkt_sched.h> +static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv) +{ + int num_tx_rings = priv->num_tx_rings; + int i; + + for (i = 0; i < priv->num_rx_rings; i++) + if (priv->rx_ring[i]->xdp.prog) + return num_tx_rings - num_possible_cpus(); + + return num_tx_rings; +} + +static struct enetc_bdr *enetc_rx_ring_from_xdp_tx_ring(struct enetc_ndev_priv *priv, + struct enetc_bdr *tx_ring) +{ + int index = &priv->tx_ring[tx_ring->index] - priv->xdp_tx_ring; + + return priv->rx_ring[index]; +} + static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd) { if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect) @@ -67,16 +88,52 @@ static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring) enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use); } -static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, - int active_offloads) +static int enetc_ptp_parse(struct sk_buff *skb, u8 *udp, + u8 *msgtype, u8 *twostep, + u16 *correction_offset, u16 *body_offset) { + unsigned int ptp_class; + struct ptp_header *hdr; + unsigned int type; + u8 *base; + + ptp_class = ptp_classify_raw(skb); + if (ptp_class == PTP_CLASS_NONE) + return -EINVAL; + + hdr = ptp_parse_header(skb, ptp_class); + if (!hdr) + return -EINVAL; + + type = ptp_class & PTP_CLASS_PMASK; + if (type == PTP_CLASS_IPV4 || type == PTP_CLASS_IPV6) + *udp = 1; + else + *udp = 0; + + *msgtype = ptp_get_msgtype(hdr, ptp_class); + *twostep = hdr->flag_field[0] & 0x2; + + base = skb_mac_header(skb); + *correction_offset = (u8 *)&hdr->correction - base; + *body_offset = (u8 *)hdr + sizeof(struct ptp_header) - base; + + return 0; +} + +static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) +{ + bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; + struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); + struct enetc_hw *hw = &priv->si->hw; struct enetc_tx_swbd *tx_swbd; - skb_frag_t *frag; int len = skb_headlen(skb); union enetc_tx_bd temp_bd; + u8 msgtype, twostep, udp; union enetc_tx_bd *txbd; - bool do_vlan, do_tstamp; + u16 offset1, offset2; int i, count = 0; + skb_frag_t *frag; unsigned int f; dma_addr_t dma; u8 flags = 0; @@ -101,12 +158,21 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, count++; do_vlan = skb_vlan_tag_present(skb); - do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) && - (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP); - tx_swbd->do_tstamp = do_tstamp; - tx_swbd->check_wb = tx_swbd->do_tstamp; + if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1, + &offset2) || + msgtype != PTP_MSGTYPE_SYNC || twostep) + WARN_ONCE(1, "Bad packet for one-step timestamping\n"); + else + do_onestep_tstamp = true; + } else if (skb->cb[0] & ENETC_F_TX_TSTAMP) { + do_twostep_tstamp = true; + } + + tx_swbd->do_twostep_tstamp = do_twostep_tstamp; + tx_swbd->check_wb = tx_swbd->do_twostep_tstamp; - if (do_vlan || do_tstamp) + if (do_vlan || do_onestep_tstamp || do_twostep_tstamp) flags |= ENETC_TXBD_FLAGS_EX; if (tx_ring->tsd_enable) @@ -143,7 +209,40 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS; } - if (do_tstamp) { + if (do_onestep_tstamp) { + u32 lo, hi, val; + u64 sec, nsec; + u8 *data; + + lo = enetc_rd_hot(hw, ENETC_SICTR0); + hi = enetc_rd_hot(hw, ENETC_SICTR1); + sec = (u64)hi << 32 | lo; + nsec = do_div(sec, 1000000000); + + /* Configure extension BD */ + temp_bd.ext.tstamp = cpu_to_le32(lo & 0x3fffffff); + e_flags |= ENETC_TXBD_E_FLAGS_ONE_STEP_PTP; + + /* Update originTimestamp field of Sync packet + * - 48 bits seconds field + * - 32 bits nanseconds field + */ + data = skb_mac_header(skb); + *(__be16 *)(data + offset2) = + htons((sec >> 32) & 0xffff); + *(__be32 *)(data + offset2 + 2) = + htonl(sec & 0xffffffff); + *(__be32 *)(data + offset2 + 6) = htonl(nsec); + + /* Configure single-step register */ + val = ENETC_PM0_SINGLE_STEP_EN; + val |= ENETC_SET_SINGLE_STEP_OFFSET(offset1); + if (udp) + val |= ENETC_PM0_SINGLE_STEP_CH; + + enetc_port_wr(hw, ENETC_PM0_SINGLE_STEP, val); + enetc_port_wr(hw, ENETC_PM1_SINGLE_STEP, val); + } else if (do_twostep_tstamp) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP; } @@ -215,7 +314,8 @@ dma_err: return 0; } -netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, + struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_bdr *tx_ring; @@ -234,7 +334,7 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) } enetc_lock_mdio(); - count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads); + count = enetc_map_tx_buffs(tx_ring, skb); enetc_unlock_mdio(); if (unlikely(!count)) @@ -250,6 +350,40 @@ drop_packet_err: return NETDEV_TX_OK; } +netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + u8 udp, msgtype, twostep; + u16 offset1, offset2; + + /* Mark tx timestamp type on skb->cb[0] if requires */ + if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) { + skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK; + } else { + skb->cb[0] = 0; + } + + /* Fall back to two-step timestamp if not one-step Sync packet */ + if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, + &offset1, &offset2) || + msgtype != PTP_MSGTYPE_SYNC || twostep != 0) + skb->cb[0] = ENETC_F_TX_TSTAMP; + } + + /* Queue one-step Sync packet if already locked */ + if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, + &priv->flags)) { + skb_queue_tail(&priv->tx_skbs, skb); + return NETDEV_TX_OK; + } + } + + return enetc_start_xmit(skb, ndev); +} + static irqreturn_t enetc_msix(int irq, void *data) { struct enetc_int_vector *v = data; @@ -354,7 +488,6 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring, struct enetc_tx_swbd *tx_swbd) { struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); - struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index]; struct enetc_rx_swbd rx_swbd = { .dma = tx_swbd->dma, .page = tx_swbd->page, @@ -362,6 +495,9 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring, .dir = tx_swbd->dir, .len = tx_swbd->len, }; + struct enetc_bdr *rx_ring; + + rx_ring = enetc_rx_ring_from_xdp_tx_ring(priv, tx_ring); if (likely(enetc_swbd_unused(rx_ring))) { enetc_reuse_page(rx_ring, &rx_swbd); @@ -390,10 +526,11 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring, static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) { struct net_device *ndev = tx_ring->ndev; + struct enetc_ndev_priv *priv = netdev_priv(ndev); int tx_frm_cnt = 0, tx_byte_cnt = 0; struct enetc_tx_swbd *tx_swbd; int i, bds_to_clean; - bool do_tstamp; + bool do_twostep_tstamp; u64 tstamp = 0; i = tx_ring->next_to_clean; @@ -401,11 +538,12 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) bds_to_clean = enetc_bd_ready_count(tx_ring, i); - do_tstamp = false; + do_twostep_tstamp = false; while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd); struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd); + bool is_eof = tx_swbd->is_eof; if (unlikely(tx_swbd->check_wb)) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -414,10 +552,10 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) txbd = ENETC_TXBD(*tx_ring, i); if (txbd->flags & ENETC_TXBD_FLAGS_W && - tx_swbd->do_tstamp) { + tx_swbd->do_twostep_tstamp) { enetc_get_tx_tstamp(&priv->si->hw, txbd, &tstamp); - do_tstamp = true; + do_twostep_tstamp = true; } } @@ -428,14 +566,19 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (xdp_frame) { xdp_return_frame(xdp_frame); - tx_swbd->xdp_frame = NULL; } else if (skb) { - if (unlikely(do_tstamp)) { + if (unlikely(tx_swbd->skb->cb[0] & + ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { + /* Start work to release lock for next one-step + * timestamping packet. And send one skb in + * tx_skbs queue if has. + */ + schedule_work(&priv->tx_onestep_tstamp); + } else if (unlikely(do_twostep_tstamp)) { enetc_tstamp_tx(skb, tstamp); - do_tstamp = false; + do_twostep_tstamp = false; } napi_consume_skb(skb, napi_budget); - tx_swbd->skb = NULL; } tx_byte_cnt += tx_swbd->len; @@ -453,7 +596,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) } /* BD iteration loop end */ - if (tx_swbd->is_eof) { + if (is_eof) { tx_frm_cnt++; /* re-arm interrupt source */ enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) | @@ -630,27 +773,35 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, return rx_swbd; } +/* Reuse the current page without performing half-page buffer flipping */ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, struct enetc_rx_swbd *rx_swbd) { - if (likely(enetc_page_reusable(rx_swbd->page))) { - size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset; + size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset; + enetc_reuse_page(rx_ring, rx_swbd); + + dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, + rx_swbd->page_offset, + buffer_size, rx_swbd->dir); + + rx_swbd->page = NULL; +} + +/* Reuse the current page by performing half-page buffer flipping */ +static void enetc_flip_rx_buff(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *rx_swbd) +{ + if (likely(enetc_page_reusable(rx_swbd->page))) { rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE; page_ref_inc(rx_swbd->page); - enetc_reuse_page(rx_ring, rx_swbd); - - /* sync for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, - rx_swbd->page_offset, - buffer_size, rx_swbd->dir); + enetc_put_rx_buff(rx_ring, rx_swbd); } else { dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, rx_swbd->dir); + rx_swbd->page = NULL; } - - rx_swbd->page = NULL; } static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, @@ -670,7 +821,7 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, skb_reserve(skb, rx_ring->buffer_offset); __skb_put(skb, size); - enetc_put_rx_buff(rx_ring, rx_swbd); + enetc_flip_rx_buff(rx_ring, rx_swbd); return skb; } @@ -683,7 +834,7 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_swbd->page, rx_swbd->page_offset, size, ENETC_RXB_TRUESIZE); - enetc_put_rx_buff(rx_ring, rx_swbd); + enetc_flip_rx_buff(rx_ring, rx_swbd); } static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring, @@ -693,12 +844,14 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring, if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK)))) return false; + enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]); enetc_rxbd_next(rx_ring, rxbd, i); while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { dma_rmb(); bd_status = le32_to_cpu((*rxbd)->r.lstatus); + enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]); enetc_rxbd_next(rx_ring, rxbd, i); } @@ -895,7 +1048,7 @@ static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring, dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(tx_ring->dev, dma))) { /* Undo the DMA mapping for all fragments */ - while (n-- >= 0) + while (--n >= 0) enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]); netdev_err(tx_ring->ndev, "DMA map error\n"); @@ -928,7 +1081,9 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames, int xdp_tx_bd_cnt, i, k; int xdp_tx_frm_cnt = 0; - tx_ring = priv->tx_ring[smp_processor_id()]; + enetc_lock_mdio(); + + tx_ring = priv->xdp_tx_ring[smp_processor_id()]; prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use)); @@ -956,6 +1111,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames, tx_ring->stats.xdp_tx += xdp_tx_frm_cnt; + enetc_unlock_mdio(); + return xdp_tx_frm_cnt; } @@ -1021,24 +1178,8 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status, } } -/* Reuse the current page without performing half-page buffer flipping */ -static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring, - struct enetc_rx_swbd *rx_swbd) -{ - enetc_reuse_page(rx_ring, rx_swbd); - - dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, - rx_swbd->page_offset, - ENETC_RXB_DMA_SIZE_XDP, - rx_swbd->dir); - - rx_swbd->page = NULL; -} - /* Convert RX buffer descriptors to TX buffer descriptors. These will be - * recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the - * RX software BDs because the ownership of the buffer no longer belongs to the - * RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page. + * recycled back into the RX ring in enetc_clean_tx_ring. */ static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr, struct enetc_bdr *rx_ring, @@ -1060,7 +1201,6 @@ static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr, tx_swbd->is_dma_page = true; tx_swbd->is_xdp_tx = true; tx_swbd->is_eof = false; - memset(rx_swbd, 0, sizeof(*rx_swbd)); } /* We rely on caller providing an rx_ring_last > rx_ring_first */ @@ -1073,8 +1213,8 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, int rx_ring_last) { while (rx_ring_first != rx_ring_last) { - enetc_put_xdp_buff(rx_ring, - &rx_ring->rx_swbd[rx_ring_first]); + enetc_put_rx_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); enetc_bdr_idx_inc(rx_ring, &rx_ring_first); } rx_ring->stats.xdp_drops++; @@ -1104,8 +1244,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0; struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0}; struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev); - struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index]; int rx_frm_cnt = 0, rx_byte_cnt = 0; + struct enetc_bdr *tx_ring; int cleaned_cnt, i; u32 xdp_act; @@ -1143,6 +1283,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, xdp_act = bpf_prog_run_xdp(prog, &xdp_buff); switch (xdp_act) { + default: + bpf_warn_invalid_xdp_action(xdp_act); + fallthrough; case XDP_ABORTED: trace_xdp_exception(rx_ring->ndev, prog, xdp_act); fallthrough; @@ -1158,12 +1301,12 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, &i, &cleaned_cnt, ENETC_RXB_DMA_SIZE_XDP); if (unlikely(!skb)) - /* Exit the switch/case, not the loop */ - break; + goto out; napi_gro_receive(napi, skb); break; case XDP_TX: + tx_ring = priv->xdp_tx_ring[rx_ring->index]; xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr, rx_ring, orig_i, i); @@ -1175,6 +1318,17 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, tx_ring->stats.xdp_tx += xdp_tx_bd_cnt; rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt; xdp_tx_frm_cnt++; + /* The XDP_TX enqueue was successful, so we + * need to scrub the RX software BDs because + * the ownership of the buffers no longer + * belongs to the RX ring, and we must prevent + * enetc_refill_rx_ring() from reusing + * rx_swbd->page. + */ + while (orig_i != i) { + rx_ring->rx_swbd[orig_i].page = NULL; + enetc_bdr_idx_inc(rx_ring, &orig_i); + } } break; case XDP_REDIRECT: @@ -1195,8 +1349,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, tmp_orig_i = orig_i; while (orig_i != i) { - enetc_put_rx_buff(rx_ring, - &rx_ring->rx_swbd[orig_i]); + enetc_flip_rx_buff(rx_ring, + &rx_ring->rx_swbd[orig_i]); enetc_bdr_idx_inc(rx_ring, &orig_i); } @@ -1207,20 +1361,12 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, xdp_redirect_frm_cnt++; rx_ring->stats.xdp_redirect++; } - - if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) { - xdp_do_flush_map(); - xdp_redirect_frm_cnt = 0; - } - - break; - default: - bpf_warn_invalid_xdp_action(xdp_act); } rx_frm_cnt++; } +out: rx_ring->next_to_clean = i; rx_ring->stats.packets += rx_frm_cnt; @@ -1861,6 +2007,29 @@ static int enetc_phylink_connect(struct net_device *ndev) return 0; } +static void enetc_tx_onestep_tstamp(struct work_struct *work) +{ + struct enetc_ndev_priv *priv; + struct sk_buff *skb; + + priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp); + + netif_tx_lock(priv->ndev); + + clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags); + skb = skb_dequeue(&priv->tx_skbs); + if (skb) + enetc_start_xmit(skb, priv->ndev); + + netif_tx_unlock(priv->ndev); +} + +static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv) +{ + INIT_WORK(&priv->tx_onestep_tstamp, enetc_tx_onestep_tstamp); + skb_queue_head_init(&priv->tx_skbs); +} + void enetc_start(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -1887,6 +2056,7 @@ void enetc_start(struct net_device *ndev) int enetc_open(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + int num_stack_tx_queues; int err; err = enetc_setup_irqs(priv); @@ -1905,7 +2075,9 @@ int enetc_open(struct net_device *ndev) if (err) goto err_alloc_rx; - err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings); + num_stack_tx_queues = enetc_num_stack_tx_queues(priv); + + err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues); if (err) goto err_set_queues; @@ -1913,6 +2085,7 @@ int enetc_open(struct net_device *ndev) if (err) goto err_set_queues; + enetc_tx_onestep_tstamp_init(priv); enetc_setup_bdrs(priv); enetc_start(ndev); @@ -1977,15 +2150,17 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) struct enetc_ndev_priv *priv = netdev_priv(ndev); struct tc_mqprio_qopt *mqprio = type_data; struct enetc_bdr *tx_ring; + int num_stack_tx_queues; u8 num_tc; int i; + num_stack_tx_queues = enetc_num_stack_tx_queues(priv); mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; num_tc = mqprio->num_tc; if (!num_tc) { netdev_reset_tc(ndev); - netif_set_real_num_tx_queues(ndev, priv->num_tx_rings); + netif_set_real_num_tx_queues(ndev, num_stack_tx_queues); /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { @@ -1997,7 +2172,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) } /* Check if we have enough BD rings available to accommodate all TCs */ - if (num_tc > priv->num_tx_rings) { + if (num_tc > num_stack_tx_queues) { netdev_err(ndev, "Max %d traffic classes supported\n", priv->num_tx_rings); return -EINVAL; @@ -2211,11 +2386,16 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) switch (config.tx_type) { case HWTSTAMP_TX_OFF: - priv->active_offloads &= ~ENETC_F_TX_TSTAMP; + priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK; break; case HWTSTAMP_TX_ON: + priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK; priv->active_offloads |= ENETC_F_TX_TSTAMP; break; + case HWTSTAMP_TX_ONESTEP_SYNC: + priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK; + priv->active_offloads |= ENETC_F_TX_ONESTEP_SYNC_TSTAMP; + break; default: return -ERANGE; } @@ -2246,7 +2426,9 @@ static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr) config.flags = 0; - if (priv->active_offloads & ENETC_F_TX_TSTAMP) + if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) + config.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + else if (priv->active_offloads & ENETC_F_TX_TSTAMP) config.tx_type = HWTSTAMP_TX_ON; else config.tx_type = HWTSTAMP_TX_OFF; @@ -2278,8 +2460,9 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) int enetc_alloc_msix(struct enetc_ndev_priv *priv) { struct pci_dev *pdev = priv->si->pdev; - int v_tx_rings; + int first_xdp_tx_ring; int i, n, err, nvec; + int v_tx_rings; nvec = ENETC_BDR_INT_BASE_IDX + priv->bdr_int_num; /* allocate MSIX for both messaging and Rx/Tx interrupts */ @@ -2343,11 +2526,7 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) int idx; /* default tx ring mapping policy */ - if (priv->bdr_int_num == ENETC_MAX_BDR_INT) - idx = 2 * j + i; /* 2 CPUs */ - else - idx = j + i * v_tx_rings; /* default */ - + idx = priv->bdr_int_num * j + i; __set_bit(idx, &v->tx_rings_map); bdr = &v->tx_ring[j]; bdr->index = idx; @@ -2358,6 +2537,9 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) } } + first_xdp_tx_ring = priv->num_tx_rings - num_possible_cpus(); + priv->xdp_tx_ring = &priv->tx_ring[first_xdp_tx_ring]; + return 0; fail: diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 05474f46b0d9..08b283347d9c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -30,7 +30,7 @@ struct enetc_tx_swbd { enum dma_data_direction dir; u8 is_dma_page:1; u8 check_wb:1; - u8 do_tstamp:1; + u8 do_twostep_tstamp:1; u8 is_eof:1; u8 is_xdp_tx:1; u8 is_xdp_redirect:1; @@ -79,7 +79,7 @@ struct enetc_xdp_data { }; #define ENETC_RX_RING_DEFAULT_SIZE 2048 -#define ENETC_TX_RING_DEFAULT_SIZE 256 +#define ENETC_TX_RING_DEFAULT_SIZE 2048 #define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2) struct enetc_bdr { @@ -237,6 +237,22 @@ static inline bool enetc_si_is_pf(struct enetc_si *si) return !!(si->hw.port); } +static inline int enetc_pf_to_port(struct pci_dev *pf_pdev) +{ + switch (pf_pdev->devfn) { + case 0: + return 0; + case 1: + return 1; + case 2: + return 2; + case 6: + return 3; + default: + return -1; + } +} + #define ENETC_MAX_NUM_TXQS 8 #define ENETC_INT_NAME_MAX (IFNAMSIZ + 8) @@ -271,12 +287,20 @@ struct psfp_cap { u32 max_psfp_meter; }; +#define ENETC_F_TX_TSTAMP_MASK 0xff /* TODO: more hardware offloads */ enum enetc_active_offloads { - ENETC_F_RX_TSTAMP = BIT(0), - ENETC_F_TX_TSTAMP = BIT(1), - ENETC_F_QBV = BIT(2), - ENETC_F_QCI = BIT(3), + /* 8 bits reserved for TX timestamp types (hwtstamp_tx_types) */ + ENETC_F_TX_TSTAMP = BIT(0), + ENETC_F_TX_ONESTEP_SYNC_TSTAMP = BIT(1), + + ENETC_F_RX_TSTAMP = BIT(8), + ENETC_F_QBV = BIT(9), + ENETC_F_QCI = BIT(10), +}; + +enum enetc_flags_bit { + ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS = 0, }; /* interrupt coalescing modes */ @@ -309,6 +333,7 @@ struct enetc_ndev_priv { u32 speed; /* store speed for compare update pspeed */ + struct enetc_bdr **xdp_tx_ring; struct enetc_bdr *tx_ring[16]; struct enetc_bdr *rx_ring[16]; @@ -321,6 +346,11 @@ struct enetc_ndev_priv { u32 tx_ictt; struct bpf_prog *xdp_prog; + + unsigned long flags; + + struct work_struct tx_onestep_tstamp; + struct sk_buff_head tx_skbs; }; /* Messaging */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 7cc81b453bd7..ebccaf02411c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -671,7 +671,8 @@ static int enetc_get_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); + (1 << HWTSTAMP_TX_ON) | + (1 << HWTSTAMP_TX_ONESTEP_SYNC); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); #else @@ -707,6 +708,22 @@ static int enetc_set_wol(struct net_device *dev, return ret; } +static void enetc_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct enetc_ndev_priv *priv = netdev_priv(dev); + + phylink_ethtool_get_pauseparam(priv->phylink, pause); +} + +static int enetc_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct enetc_ndev_priv *priv = netdev_priv(dev); + + return phylink_ethtool_set_pauseparam(priv->phylink, pause); +} + static int enetc_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { @@ -753,6 +770,8 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = { .get_ts_info = enetc_get_ts_info, .get_wol = enetc_get_wol, .set_wol = enetc_set_wol, + .get_pauseparam = enetc_get_pauseparam, + .set_pauseparam = enetc_set_pauseparam, }; static const struct ethtool_ops enetc_vf_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 00938f7960a4..0f5f081a5baf 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -109,6 +109,7 @@ enum enetc_bdr_type {TX, RX}; /* RX BDR reg offsets */ #define ENETC_RBMR 0 #define ENETC_RBMR_BDS BIT(2) +#define ENETC_RBMR_CM BIT(4) #define ENETC_RBMR_VTE BIT(5) #define ENETC_RBMR_EN BIT(31) #define ENETC_RBSR 0x4 @@ -180,6 +181,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PSIVLANR(n) (0x0240 + (n) * 4) /* n = SI index */ #define ENETC_PSIVLAN_EN BIT(31) #define ENETC_PSIVLAN_SET_QOS(val) ((u32)(val) << 12) +#define ENETC_PPAUONTR 0x0410 +#define ENETC_PPAUOFFTR 0x0414 #define ENETC_PTXMBAR 0x0608 #define ENETC_PCAPR0 0x0900 #define ENETC_PCAPR0_RXBDR(val) ((val) >> 24) @@ -227,6 +230,7 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_TX_EN BIT(0) #define ENETC_PM0_RX_EN BIT(1) #define ENETC_PM0_PROMISC BIT(4) +#define ENETC_PM0_PAUSE_IGN BIT(8) #define ENETC_PM0_CMD_XGLP BIT(10) #define ENETC_PM0_CMD_TXP BIT(11) #define ENETC_PM0_CMD_PHY_TX_EN BIT(15) @@ -239,6 +243,17 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM_IMDIO_BASE 0x8030 +#define ENETC_PM0_PAUSE_QUANTA 0x8054 +#define ENETC_PM0_PAUSE_THRESH 0x8064 +#define ENETC_PM1_PAUSE_QUANTA 0x9054 +#define ENETC_PM1_PAUSE_THRESH 0x9064 + +#define ENETC_PM0_SINGLE_STEP 0x80c0 +#define ENETC_PM1_SINGLE_STEP 0x90c0 +#define ENETC_PM0_SINGLE_STEP_CH BIT(7) +#define ENETC_PM0_SINGLE_STEP_EN BIT(31) +#define ENETC_SET_SINGLE_STEP_OFFSET(v) (((v) & 0xff) << 8) + #define ENETC_PM0_IF_MODE 0x8300 #define ENETC_PM0_IFM_RG BIT(2) #define ENETC_PM0_IFM_RLP (BIT(5) | BIT(11)) @@ -548,6 +563,7 @@ static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd) /* Extension flags */ #define ENETC_TXBD_E_FLAGS_VLAN_INS BIT(0) +#define ENETC_TXBD_E_FLAGS_ONE_STEP_PTP BIT(1) #define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP BIT(2) union enetc_rx_bd { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c new file mode 100644 index 000000000000..8b356c485507 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2021 NXP Semiconductors + * + * The Integrated Endpoint Register Block (IERB) is configured by pre-boot + * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe + * card. Upon FLR, values from the IERB are transferred to the ENETC PFs, and + * are read-only in the PF memory space. + * + * This driver fixes up the power-on reset values for the ENETC shared FIFO, + * such that the TX and RX allocations are sufficient for jumbo frames, and + * that intelligent FIFO dropping is enabled before the internal data + * structures are corrupted. + * + * Even though not all ports might be used on a given board, we are not + * concerned with partitioning the FIFO, because the default values configure + * no strict reservations, so the entire FIFO can be used by the RX of a single + * port, or the TX of a single port. + */ + +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include "enetc.h" +#include "enetc_ierb.h" + +/* IERB registers */ +#define ENETC_IERB_TXMBAR(port) (((port) * 0x100) + 0x8080) +#define ENETC_IERB_RXMBER(port) (((port) * 0x100) + 0x8090) +#define ENETC_IERB_RXMBLR(port) (((port) * 0x100) + 0x8094) +#define ENETC_IERB_RXBCR(port) (((port) * 0x100) + 0x80a0) +#define ENETC_IERB_TXBCR(port) (((port) * 0x100) + 0x80a8) +#define ENETC_IERB_FMBDTR 0xa000 + +#define ENETC_RESERVED_FOR_ICM 1024 + +struct enetc_ierb { + void __iomem *regs; +}; + +static void enetc_ierb_write(struct enetc_ierb *ierb, u32 offset, u32 val) +{ + iowrite32(val, ierb->regs + offset); +} + +int enetc_ierb_register_pf(struct platform_device *pdev, + struct pci_dev *pf_pdev) +{ + struct enetc_ierb *ierb = platform_get_drvdata(pdev); + int port = enetc_pf_to_port(pf_pdev); + u16 tx_credit, rx_credit, tx_alloc; + + if (port < 0) + return -ENODEV; + + if (!ierb) + return -EPROBE_DEFER; + + /* By default, it is recommended to set the Host Transfer Agent + * per port transmit byte credit to "1000 + max_frame_size/2". + * The power-on reset value (1800 bytes) is rounded up to the nearest + * 100 assuming a maximum frame size of 1536 bytes. + */ + tx_credit = roundup(1000 + ENETC_MAC_MAXFRM_SIZE / 2, 100); + + /* Internal memory allocated for transmit buffering is guaranteed but + * not reserved; i.e. if the total transmit allocation is not used, + * then the unused portion is not left idle, it can be used for receive + * buffering but it will be reclaimed, if required, from receive by + * intelligently dropping already stored receive frames in the internal + * memory to ensure that the transmit allocation is respected. + * + * PaTXMBAR must be set to a value larger than + * PaTXBCR + 2 * max_frame_size + 32 + * if frame preemption is not enabled, or to + * 2 * PaTXBCR + 2 * p_max_frame_size (pMAC maximum frame size) + + * 2 * np_max_frame_size (eMAC maximum frame size) + 64 + * if frame preemption is enabled. + */ + tx_alloc = roundup(2 * tx_credit + 4 * ENETC_MAC_MAXFRM_SIZE + 64, 16); + + /* Initial credits, in units of 8 bytes, to the Ingress Congestion + * Manager for the maximum amount of bytes the port is allocated for + * pending traffic. + * It is recommended to set the initial credits to 2 times the maximum + * frame size (2 frames of maximum size). + */ + rx_credit = DIV_ROUND_UP(ENETC_MAC_MAXFRM_SIZE * 2, 8); + + enetc_ierb_write(ierb, ENETC_IERB_TXBCR(port), tx_credit); + enetc_ierb_write(ierb, ENETC_IERB_TXMBAR(port), tx_alloc); + enetc_ierb_write(ierb, ENETC_IERB_RXBCR(port), rx_credit); + + return 0; +} +EXPORT_SYMBOL(enetc_ierb_register_pf); + +static int enetc_ierb_probe(struct platform_device *pdev) +{ + struct enetc_ierb *ierb; + struct resource *res; + void __iomem *regs; + + ierb = devm_kzalloc(&pdev->dev, sizeof(*ierb), GFP_KERNEL); + if (!ierb) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + ierb->regs = regs; + + /* Free buffer depletion threshold in bytes. + * This sets the minimum amount of free buffer memory that should be + * maintained in the datapath sub system, and when the amount of free + * buffer memory falls below this threshold, a depletion indication is + * asserted, which may trigger "intelligent drop" frame releases from + * the ingress queues in the ICM. + * It is recommended to set the free buffer depletion threshold to 1024 + * bytes, since the ICM needs some FIFO memory for its own use. + */ + enetc_ierb_write(ierb, ENETC_IERB_FMBDTR, ENETC_RESERVED_FOR_ICM); + + platform_set_drvdata(pdev, ierb); + + return 0; +} + +static int enetc_ierb_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id enetc_ierb_match[] = { + { .compatible = "fsl,ls1028a-enetc-ierb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, enetc_ierb_match); + +static struct platform_driver enetc_ierb_driver = { + .driver = { + .name = "fsl-enetc-ierb", + .of_match_table = enetc_ierb_match, + }, + .probe = enetc_ierb_probe, + .remove = enetc_ierb_remove, +}; + +module_platform_driver(enetc_ierb_driver); + +MODULE_DESCRIPTION("NXP ENETC IERB"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h new file mode 100644 index 000000000000..b3b774e0998a --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2021 NXP Semiconductors */ + +#include <linux/pci.h> +#include <linux/platform_device.h> + +#if IS_ENABLED(CONFIG_FSL_ENETC_IERB) + +int enetc_ierb_register_pf(struct platform_device *pdev, + struct pci_dev *pf_pdev); + +#else + +static inline int enetc_ierb_register_pf(struct platform_device *pdev, + struct pci_dev *pf_pdev) +{ + return -EOPNOTSUPP; +} + +#endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index f61fedf462e5..31274325159a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -4,8 +4,10 @@ #include <linux/mdio.h> #include <linux/module.h> #include <linux/fsl/enetc_mdio.h> +#include <linux/of_platform.h> #include <linux/of_mdio.h> #include <linux/of_net.h> +#include "enetc_ierb.h" #include "enetc_pf.h" #define ENETC_DRV_NAME_STR "ENETC PF driver" @@ -390,23 +392,54 @@ static int enetc_pf_set_vf_spoofchk(struct net_device *ndev, int vf, bool en) return 0; } -static void enetc_port_setup_primary_mac_address(struct enetc_si *si) +static int enetc_setup_mac_address(struct device_node *np, struct enetc_pf *pf, + int si) { - unsigned char mac_addr[MAX_ADDR_LEN]; - struct enetc_pf *pf = enetc_si_priv(si); - struct enetc_hw *hw = &si->hw; - int i; + struct device *dev = &pf->si->pdev->dev; + struct enetc_hw *hw = &pf->si->hw; + u8 mac_addr[ETH_ALEN] = { 0 }; + int err; - /* check MAC addresses for PF and all VFs, if any is 0 set it ro rand */ - for (i = 0; i < pf->total_vfs + 1; i++) { - enetc_pf_get_primary_mac_addr(hw, i, mac_addr); - if (!is_zero_ether_addr(mac_addr)) - continue; + /* (1) try to get the MAC address from the device tree */ + if (np) { + err = of_get_mac_address(np, mac_addr); + if (err == -EPROBE_DEFER) + return err; + } + + /* (2) bootloader supplied MAC address */ + if (is_zero_ether_addr(mac_addr)) + enetc_pf_get_primary_mac_addr(hw, si, mac_addr); + + /* (3) choose a random one */ + if (is_zero_ether_addr(mac_addr)) { eth_random_addr(mac_addr); - dev_info(&si->pdev->dev, "no MAC address specified for SI%d, using %pM\n", - i, mac_addr); - enetc_pf_set_primary_mac_addr(hw, i, mac_addr); + dev_info(dev, "no MAC address specified for SI%d, using %pM\n", + si, mac_addr); + } + + enetc_pf_set_primary_mac_addr(hw, si, mac_addr); + + return 0; +} + +static int enetc_setup_mac_addresses(struct device_node *np, + struct enetc_pf *pf) +{ + int err, i; + + /* The PF might take its MAC from the device tree */ + err = enetc_setup_mac_address(np, pf, 0); + if (err) + return err; + + for (i = 0; i < pf->total_vfs; i++) { + err = enetc_setup_mac_address(NULL, pf, i + 1); + if (err) + return err; } + + return 0; } static void enetc_port_assign_rfs_entries(struct enetc_si *si) @@ -487,7 +520,6 @@ static void enetc_configure_port_mac(struct enetc_hw *hw) ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE)); enetc_port_wr(hw, ENETC_PTCMSDUR(0), ENETC_MAC_MAXFRM_SIZE); - enetc_port_wr(hw, ENETC_PTXMBAR, 2 * ENETC_MAC_MAXFRM_SIZE); enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC); @@ -562,9 +594,6 @@ static void enetc_configure_port(struct enetc_pf *pf) /* split up RFS entries */ enetc_port_assign_rfs_entries(pf->si); - /* fix-up primary MAC addresses, if not set already */ - enetc_port_setup_primary_mac_address(pf->si); - /* enforce VLAN promisc mode for all SIs */ pf->vlan_promisc_simap = ENETC_VLAN_PROMISC_MAP_ALL; enetc_set_vlan_promisc(hw, pf->vlan_promisc_simap); @@ -985,7 +1014,12 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, int duplex, bool tx_pause, bool rx_pause) { struct enetc_pf *pf = phylink_to_enetc_pf(config); + u32 pause_off_thresh = 0, pause_on_thresh = 0; + u32 init_quanta = 0, refresh_quanta = 0; + struct enetc_hw *hw = &pf->si->hw; struct enetc_ndev_priv *priv; + u32 rbmr, cmd_cfg; + int idx; priv = netdev_priv(pf->si->ndev); if (priv->active_offloads & ENETC_F_QBV) @@ -993,9 +1027,60 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, if (!phylink_autoneg_inband(mode) && phy_interface_mode_is_rgmii(interface)) - enetc_force_rgmii_mac(&pf->si->hw, speed, duplex); + enetc_force_rgmii_mac(hw, speed, duplex); + + /* Flow control */ + for (idx = 0; idx < priv->num_rx_rings; idx++) { + rbmr = enetc_rxbdr_rd(hw, idx, ENETC_RBMR); + + if (tx_pause) + rbmr |= ENETC_RBMR_CM; + else + rbmr &= ~ENETC_RBMR_CM; + + enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); + } + + if (tx_pause) { + /* When the port first enters congestion, send a PAUSE request + * with the maximum number of quanta. When the port exits + * congestion, it will automatically send a PAUSE frame with + * zero quanta. + */ + init_quanta = 0xffff; + + /* Also, set up the refresh timer to send follow-up PAUSE + * frames at half the quanta value, in case the congestion + * condition persists. + */ + refresh_quanta = 0xffff / 2; + + /* Start emitting PAUSE frames when 3 large frames (or more + * smaller frames) have accumulated in the FIFO waiting to be + * DMAed to the RX ring. + */ + pause_on_thresh = 3 * ENETC_MAC_MAXFRM_SIZE; + pause_off_thresh = 1 * ENETC_MAC_MAXFRM_SIZE; + } + + enetc_port_wr(hw, ENETC_PM0_PAUSE_QUANTA, init_quanta); + enetc_port_wr(hw, ENETC_PM1_PAUSE_QUANTA, init_quanta); + enetc_port_wr(hw, ENETC_PM0_PAUSE_THRESH, refresh_quanta); + enetc_port_wr(hw, ENETC_PM1_PAUSE_THRESH, refresh_quanta); + enetc_port_wr(hw, ENETC_PPAUONTR, pause_on_thresh); + enetc_port_wr(hw, ENETC_PPAUOFFTR, pause_off_thresh); - enetc_mac_enable(&pf->si->hw, true); + cmd_cfg = enetc_port_rd(hw, ENETC_PM0_CMD_CFG); + + if (rx_pause) + cmd_cfg &= ~ENETC_PM0_PAUSE_IGN; + else + cmd_cfg |= ENETC_PM0_PAUSE_IGN; + + enetc_port_wr(hw, ENETC_PM0_CMD_CFG, cmd_cfg); + enetc_port_wr(hw, ENETC_PM1_CMD_CFG, cmd_cfg); + + enetc_mac_enable(hw, true); } static void enetc_pl_mac_link_down(struct phylink_config *config, @@ -1087,6 +1172,30 @@ static int enetc_init_port_rss_memory(struct enetc_si *si) return err; } +static int enetc_pf_register_with_ierb(struct pci_dev *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct platform_device *ierb_pdev; + struct device_node *ierb_node; + + /* Don't register with the IERB if the PF itself is disabled */ + if (!node || !of_device_is_available(node)) + return 0; + + ierb_node = of_find_compatible_node(NULL, NULL, + "fsl,ls1028a-enetc-ierb"); + if (!ierb_node || !of_device_is_available(ierb_node)) + return -ENODEV; + + ierb_pdev = of_find_device_by_node(ierb_node); + of_node_put(ierb_node); + + if (!ierb_pdev) + return -EPROBE_DEFER; + + return enetc_ierb_register_pf(ierb_pdev, pdev); +} + static int enetc_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1097,6 +1206,14 @@ static int enetc_pf_probe(struct pci_dev *pdev, struct enetc_pf *pf; int err; + err = enetc_pf_register_with_ierb(pdev); + if (err == -EPROBE_DEFER) + return err; + if (err) + dev_warn(&pdev->dev, + "Could not register with IERB driver: %pe, please update the device tree\n", + ERR_PTR(err)); + err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf)); if (err) { dev_err(&pdev->dev, "PCI probing failed\n"); @@ -1137,6 +1254,10 @@ static int enetc_pf_probe(struct pci_dev *pdev, pf->si = si; pf->total_vfs = pci_sriov_get_totalvfs(pdev); + err = enetc_setup_mac_addresses(node, pf); + if (err) + goto err_setup_mac_addresses; + enetc_configure_port(pf); enetc_get_si_caps(si); @@ -1204,6 +1325,7 @@ err_alloc_netdev: err_init_port_rss: err_init_port_rfs: err_device_disabled: +err_setup_mac_addresses: enetc_teardown_cbdr(&si->cbd_ring); err_setup_cbdr: err_map_pf_space: diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index cb7fa4bceaf2..af699f2ad095 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -455,11 +455,6 @@ static struct enetc_psfp epsfp = { static LIST_HEAD(enetc_block_cb_list); -static inline int enetc_get_port(struct enetc_ndev_priv *priv) -{ - return priv->si->pdev->devfn & 0x7; -} - /* Stream Identity Entry Set Descriptor */ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_streamid *sid, @@ -504,7 +499,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, si_conf = &cbd.sid_set; /* Only one port supported for one entry, set itself */ - si_conf->iports = cpu_to_le32(1 << enetc_get_port(priv)); + si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev)); si_conf->id_type = 1; si_conf->oui[2] = 0x0; si_conf->oui[1] = 0x80; @@ -529,7 +524,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, si_conf->en = 0x80; si_conf->stream_handle = cpu_to_le32(sid->handle); - si_conf->iports = cpu_to_le32(1 << enetc_get_port(priv)); + si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev)); si_conf->id_type = sid->filtertype; si_conf->oui[2] = 0x0; si_conf->oui[1] = 0x80; @@ -591,7 +586,8 @@ static int enetc_streamfilter_hw_set(struct enetc_ndev_priv *priv, } sfi_config->sg_inst_table_index = cpu_to_le16(sfi->gate_id); - sfi_config->input_ports = cpu_to_le32(1 << enetc_get_port(priv)); + sfi_config->input_ports = + cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev)); /* The priority value which may be matched against the * frame’s priority value to determine a match for this entry. @@ -1562,10 +1558,10 @@ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data) switch (f->command) { case FLOW_BLOCK_BIND: - set_bit(enetc_get_port(priv), &epsfp.dev_bitmap); + set_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap); break; case FLOW_BLOCK_UNBIND: - clear_bit(enetc_get_port(priv), &epsfp.dev_bitmap); + clear_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap); if (!epsfp.dev_bitmap) clean_psfp_all(); break; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3db882322b2b..f2065f9d02e6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -38,6 +38,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <net/ip.h> +#include <net/selftests.h> #include <net/tso.h> #include <linux/tcp.h> #include <linux/udp.h> @@ -1665,6 +1666,7 @@ static void fec_get_mac(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); unsigned char *iap, tmpaddr[ETH_ALEN]; + int ret; /* * try to get mac address in following order: @@ -1680,9 +1682,9 @@ static void fec_get_mac(struct net_device *ndev) if (!is_valid_ether_addr(iap)) { struct device_node *np = fep->pdev->dev.of_node; if (np) { - const char *mac = of_get_mac_address(np); - if (!IS_ERR(mac)) - iap = (unsigned char *) mac; + ret = of_get_mac_address(np, tmpaddr); + if (!ret) + iap = tmpaddr; } } @@ -2048,6 +2050,8 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; + phy_dev->mac_managed_pm = 1; + phy_attached_info(phy_dev); return 0; @@ -2479,6 +2483,9 @@ static void fec_enet_get_strings(struct net_device *netdev, memcpy(data + i * ETH_GSTRING_LEN, fec_stats[i].name, ETH_GSTRING_LEN); break; + case ETH_SS_TEST: + net_selftest_get_strings(data); + break; } } @@ -2487,6 +2494,8 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: return ARRAY_SIZE(fec_stats); + case ETH_SS_TEST: + return net_selftest_get_count(); default: return -EOPNOTSUPP; } @@ -2738,6 +2747,7 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .set_wol = fec_enet_set_wol, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .self_test = net_selftest, }; static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) @@ -3864,6 +3874,7 @@ static int __maybe_unused fec_resume(struct device *dev) netif_device_attach(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); + phy_init_hw(ndev->phydev); phy_start(ndev->phydev); } rtnl_unlock(); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index b3bad429e03b..02c47658a215 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -813,7 +813,6 @@ static int mpc52xx_fec_probe(struct platform_device *op) const u32 *prop; int prop_size; struct device_node *np = op->dev.of_node; - const char *mac_addr; phys_addr_t rx_fifo; phys_addr_t tx_fifo; @@ -891,10 +890,8 @@ static int mpc52xx_fec_probe(struct platform_device *op) * * First try to read MAC address from DT */ - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(ndev->dev_addr, mac_addr); - } else { + rv = of_get_mac_address(np, ndev->dev_addr); + if (rv) { struct mpc52xx_fec __iomem *fec = priv->fec; /* diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 901749a7a318..46ecb42f2ef8 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -605,7 +605,6 @@ static int mac_probe(struct platform_device *_of_dev) struct platform_device *of_dev; struct resource res; struct mac_priv_s *priv; - const u8 *mac_addr; u32 val; u8 fman_id; phy_interface_t phy_if; @@ -723,11 +722,9 @@ static int mac_probe(struct platform_device *_of_dev) priv->cell_index = (u8)val; /* Get the MAC address */ - mac_addr = of_get_mac_address(mac_node); - if (IS_ERR(mac_addr)) + err = of_get_mac_address(mac_node, mac_dev->addr); + if (err) dev_warn(dev, "of_get_mac_address(%pOF) failed\n", mac_node); - else - ether_addr_copy(mac_dev->addr, mac_addr); /* Get the port handles */ nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL); @@ -853,7 +850,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err < 0) dev_err(dev, "fman_set_mac_active_pause() = %d\n", err); - if (!IS_ERR(mac_addr)) + if (!is_zero_ether_addr(mac_dev->addr)) dev_info(dev, "FMan MAC address: %pM\n", mac_dev->addr); priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev); diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 78e008b81374..6ee325ad35c5 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -918,7 +918,6 @@ static int fs_enet_probe(struct platform_device *ofdev) const u32 *data; struct clk *clk; int err; - const u8 *mac_addr; const char *phy_connection_type; int privsize, len, ret = -ENODEV; @@ -1006,9 +1005,7 @@ static int fs_enet_probe(struct platform_device *ofdev) spin_lock_init(&fep->lock); spin_lock_init(&fep->tx_lock); - mac_addr = of_get_mac_address(ofdev->dev.of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); + of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr); ret = fep->ops->allocate_bd(ndev); if (ret) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 1cf8ef717453..f2945abdb041 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -175,10 +175,7 @@ static void gfar_mac_rx_config(struct gfar_private *priv) if (priv->rx_filer_enable) { rctrl |= RCTRL_FILREN | RCTRL_PRSDEP_INIT; /* Program the RIR0 reg with the required distribution */ - if (priv->poll_mode == GFAR_SQ_POLLING) - gfar_write(®s->rir0, DEFAULT_2RXQ_RIR0); - else /* GFAR_MQ_POLLING */ - gfar_write(®s->rir0, DEFAULT_8RXQ_RIR0); + gfar_write(®s->rir0, DEFAULT_2RXQ_RIR0); } /* Restore PROMISC mode */ @@ -363,7 +360,11 @@ static void gfar_set_mac_for_addr(struct net_device *dev, int num, static int gfar_set_mac_addr(struct net_device *dev, void *p) { - eth_mac_addr(dev, p); + int ret; + + ret = eth_mac_addr(dev, p); + if (ret) + return ret; gfar_set_mac_for_addr(dev, 0, dev->dev_addr); @@ -517,29 +518,9 @@ static int gfar_parse_group(struct device_node *np, grp->priv = priv; spin_lock_init(&grp->grplock); if (priv->mode == MQ_MG_MODE) { - u32 rxq_mask, txq_mask; - int ret; - + /* One Q per interrupt group: Q0 to G0, Q1 to G1 */ grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); - - ret = of_property_read_u32(np, "fsl,rx-bit-map", &rxq_mask); - if (!ret) { - grp->rx_bit_map = rxq_mask ? - rxq_mask : (DEFAULT_MAPPING >> priv->num_grps); - } - - ret = of_property_read_u32(np, "fsl,tx-bit-map", &txq_mask); - if (!ret) { - grp->tx_bit_map = txq_mask ? - txq_mask : (DEFAULT_MAPPING >> priv->num_grps); - } - - if (priv->poll_mode == GFAR_SQ_POLLING) { - /* One Q per interrupt group: Q0 to G0, Q1 to G1 */ - grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); - grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); - } } else { grp->rx_bit_map = 0xFF; grp->tx_bit_map = 0xFF; @@ -636,7 +617,6 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) { const char *model; - const void *mac_addr; int err = 0, i; phy_interface_t interface; struct net_device *dev = NULL; @@ -646,18 +626,15 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) u32 stash_len = 0; u32 stash_idx = 0; unsigned int num_tx_qs, num_rx_qs; - unsigned short mode, poll_mode; + unsigned short mode; if (!np) return -ENODEV; - if (of_device_is_compatible(np, "fsl,etsec2")) { + if (of_device_is_compatible(np, "fsl,etsec2")) mode = MQ_MG_MODE; - poll_mode = GFAR_SQ_POLLING; - } else { + else mode = SQ_SG_MODE; - poll_mode = GFAR_SQ_POLLING; - } if (mode == SQ_SG_MODE) { num_tx_qs = 1; @@ -673,22 +650,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) return -EINVAL; } - if (poll_mode == GFAR_SQ_POLLING) { - num_tx_qs = num_grps; /* one txq per int group */ - num_rx_qs = num_grps; /* one rxq per int group */ - } else { /* GFAR_MQ_POLLING */ - u32 tx_queues, rx_queues; - int ret; - - /* parse the num of HW tx and rx queues */ - ret = of_property_read_u32(np, "fsl,num_tx_queues", - &tx_queues); - num_tx_qs = ret ? 1 : tx_queues; - - ret = of_property_read_u32(np, "fsl,num_rx_queues", - &rx_queues); - num_rx_qs = ret ? 1 : rx_queues; - } + num_tx_qs = num_grps; /* one txq per int group */ + num_rx_qs = num_grps; /* one rxq per int group */ } if (num_tx_qs > MAX_TX_QS) { @@ -714,7 +677,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) priv->ndev = dev; priv->mode = mode; - priv->poll_mode = poll_mode; priv->num_tx_queues = num_tx_qs; netif_set_real_num_rx_queues(dev, num_rx_qs); @@ -778,11 +740,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) if (stash_len || stash_idx) priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING; - mac_addr = of_get_mac_address(np); - - if (!IS_ERR(mac_addr)) { - ether_addr_copy(dev->dev_addr, mac_addr); - } else { + err = of_get_mac_address(np, dev->dev_addr); + if (err) { eth_hw_addr_random(dev); dev_info(&ofdev->dev, "Using random MAC address: %pM\n", dev->dev_addr); } @@ -2691,106 +2650,6 @@ static int gfar_poll_tx_sq(struct napi_struct *napi, int budget) return 0; } -static int gfar_poll_rx(struct napi_struct *napi, int budget) -{ - struct gfar_priv_grp *gfargrp = - container_of(napi, struct gfar_priv_grp, napi_rx); - struct gfar_private *priv = gfargrp->priv; - struct gfar __iomem *regs = gfargrp->regs; - struct gfar_priv_rx_q *rx_queue = NULL; - int work_done = 0, work_done_per_q = 0; - int i, budget_per_q = 0; - unsigned long rstat_rxf; - int num_act_queues; - - /* Clear IEVENT, so interrupts aren't called again - * because of the packets that have already arrived - */ - gfar_write(®s->ievent, IEVENT_RX_MASK); - - rstat_rxf = gfar_read(®s->rstat) & RSTAT_RXF_MASK; - - num_act_queues = bitmap_weight(&rstat_rxf, MAX_RX_QS); - if (num_act_queues) - budget_per_q = budget/num_act_queues; - - for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) { - /* skip queue if not active */ - if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i))) - continue; - - rx_queue = priv->rx_queue[i]; - work_done_per_q = - gfar_clean_rx_ring(rx_queue, budget_per_q); - work_done += work_done_per_q; - - /* finished processing this queue */ - if (work_done_per_q < budget_per_q) { - /* clear active queue hw indication */ - gfar_write(®s->rstat, - RSTAT_CLEAR_RXF0 >> i); - num_act_queues--; - - if (!num_act_queues) - break; - } - } - - if (!num_act_queues) { - u32 imask; - napi_complete_done(napi, work_done); - - /* Clear the halt bit in RSTAT */ - gfar_write(®s->rstat, gfargrp->rstat); - - spin_lock_irq(&gfargrp->grplock); - imask = gfar_read(®s->imask); - imask |= IMASK_RX_DEFAULT; - gfar_write(®s->imask, imask); - spin_unlock_irq(&gfargrp->grplock); - } - - return work_done; -} - -static int gfar_poll_tx(struct napi_struct *napi, int budget) -{ - struct gfar_priv_grp *gfargrp = - container_of(napi, struct gfar_priv_grp, napi_tx); - struct gfar_private *priv = gfargrp->priv; - struct gfar __iomem *regs = gfargrp->regs; - struct gfar_priv_tx_q *tx_queue = NULL; - int has_tx_work = 0; - int i; - - /* Clear IEVENT, so interrupts aren't called again - * because of the packets that have already arrived - */ - gfar_write(®s->ievent, IEVENT_TX_MASK); - - for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) { - tx_queue = priv->tx_queue[i]; - /* run Tx cleanup to completion */ - if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) { - gfar_clean_tx_ring(tx_queue); - has_tx_work = 1; - } - } - - if (!has_tx_work) { - u32 imask; - napi_complete(napi); - - spin_lock_irq(&gfargrp->grplock); - imask = gfar_read(®s->imask); - imask |= IMASK_TX_DEFAULT; - gfar_write(®s->imask, imask); - spin_unlock_irq(&gfargrp->grplock); - } - - return 0; -} - /* GFAR error interrupt handler */ static irqreturn_t gfar_error(int irq, void *grp_id) { @@ -3348,17 +3207,10 @@ static int gfar_probe(struct platform_device *ofdev) /* Register for napi ...We are registering NAPI for each grp */ for (i = 0; i < priv->num_grps; i++) { - if (priv->poll_mode == GFAR_SQ_POLLING) { - netif_napi_add(dev, &priv->gfargrp[i].napi_rx, - gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, - gfar_poll_tx_sq, 2); - } else { - netif_napi_add(dev, &priv->gfargrp[i].napi_rx, - gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, - gfar_poll_tx, 2); - } + netif_napi_add(dev, &priv->gfargrp[i].napi_rx, + gfar_poll_rx_sq, GFAR_DEV_WEIGHT); + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, + gfar_poll_tx_sq, 2); } if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 8ced783f5302..5ea47df93e5e 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -909,22 +909,6 @@ enum { MQ_MG_MODE }; -/* GFAR_SQ_POLLING: Single Queue NAPI polling mode - * The driver supports a single pair of RX/Tx queues - * per interrupt group (Rx/Tx int line). MQ_MG mode - * devices have 2 interrupt groups, so the device will - * have a total of 2 Tx and 2 Rx queues in this case. - * GFAR_MQ_POLLING: Multi Queue NAPI polling mode - * The driver supports all the 8 Rx and Tx HW queues - * each queue mapped by the Device Tree to one of - * the 2 interrupt groups. This mode implies significant - * processing overhead (CPU and controller level). - */ -enum gfar_poll_mode { - GFAR_SQ_POLLING = 0, - GFAR_MQ_POLLING -}; - /* * Per TX queue stats */ @@ -1105,7 +1089,6 @@ struct gfar_private { unsigned long state; unsigned short mode; - unsigned short poll_mode; unsigned int num_tx_queues; unsigned int num_rx_queues; unsigned int num_grps; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index ef4e2febeb5b..e0936510fa34 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3562,7 +3562,6 @@ static int ucc_geth_probe(struct platform_device* ofdev) struct resource res; int err, ucc_num, max_speed = 0; const unsigned int *prop; - const void *mac_addr; phy_interface_t phy_interface; static const int enet_to_speed[] = { SPEED_10, SPEED_10, SPEED_10, @@ -3733,9 +3732,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) goto err_free_netdev; } - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(dev->dev_addr, mac_addr); + of_get_mac_address(np, dev->dev_addr); ugeth->ug_info = ug_info; ugeth->dev = device; diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 57c3bc4f7089..3c4db4a6b431 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -772,7 +772,6 @@ static int hisi_femac_drv_probe(struct platform_device *pdev) struct net_device *ndev; struct hisi_femac_priv *priv; struct phy_device *phy; - const char *mac_addr; int ret; ndev = alloc_etherdev(sizeof(*priv)); @@ -842,10 +841,8 @@ static int hisi_femac_drv_probe(struct platform_device *pdev) (unsigned long)phy->phy_id, phy_modes(phy->interface)); - mac_addr = of_get_mac_address(node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(node, ndev->dev_addr); + if (ret) { eth_hw_addr_random(ndev); dev_warn(dev, "using random MAC address %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 8b2bf85039f1..c1aae0fca5e9 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -1098,7 +1098,6 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) struct net_device *ndev; struct hix5hd2_priv *priv; struct mii_bus *bus; - const char *mac_addr; int ret; ndev = alloc_etherdev(sizeof(struct hix5hd2_priv)); @@ -1220,10 +1219,8 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) goto out_phy_node; } - mac_addr = of_get_mac_address(node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(node, ndev->dev_addr); + if (ret) { eth_hw_addr_random(ndev); netdev_warn(ndev, "using random MAC address %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 33defa4c180a..a2c17af57fde 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -172,4 +172,7 @@ struct hclgevf_mbx_arq_ring { (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #define hclge_mbx_head_ptr_move_arq(arq) \ (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) + +/* PF immediately push link status to VFs when link status changed */ +#define HCLGE_MBX_PUSH_LINK_STATUS_EN BIT(0) #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index a234116ba0e5..1d2189047781 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -474,8 +474,9 @@ struct hnae3_ae_dev { struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); void (*uninit_ae_dev)(struct hnae3_ae_dev *ae_dev); - void (*flr_prepare)(struct hnae3_ae_dev *ae_dev); - void (*flr_done)(struct hnae3_ae_dev *ae_dev); + void (*reset_prepare)(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type); + void (*reset_done)(struct hnae3_ae_dev *ae_dev); int (*init_client_instance)(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev); void (*uninit_client_instance)(struct hnae3_client *client, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 076bfb76bdb9..c21dd11baed9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2365,6 +2365,32 @@ static void hns3_shutdown(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D3hot); } +static int __maybe_unused hns3_suspend(struct device *dev) +{ + struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev); + + if (ae_dev && hns3_is_phys_func(ae_dev->pdev)) { + dev_info(dev, "Begin to suspend.\n"); + if (ae_dev->ops && ae_dev->ops->reset_prepare) + ae_dev->ops->reset_prepare(ae_dev, HNAE3_FUNC_RESET); + } + + return 0; +} + +static int __maybe_unused hns3_resume(struct device *dev) +{ + struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev); + + if (ae_dev && hns3_is_phys_func(ae_dev->pdev)) { + dev_info(dev, "Begin to resume.\n"); + if (ae_dev->ops && ae_dev->ops->reset_done) + ae_dev->ops->reset_done(ae_dev); + } + + return 0; +} + static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -2423,8 +2449,8 @@ static void hns3_reset_prepare(struct pci_dev *pdev) struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); dev_info(&pdev->dev, "FLR prepare\n"); - if (ae_dev && ae_dev->ops && ae_dev->ops->flr_prepare) - ae_dev->ops->flr_prepare(ae_dev); + if (ae_dev && ae_dev->ops && ae_dev->ops->reset_prepare) + ae_dev->ops->reset_prepare(ae_dev, HNAE3_FLR_RESET); } static void hns3_reset_done(struct pci_dev *pdev) @@ -2432,8 +2458,8 @@ static void hns3_reset_done(struct pci_dev *pdev) struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); dev_info(&pdev->dev, "FLR done\n"); - if (ae_dev && ae_dev->ops && ae_dev->ops->flr_done) - ae_dev->ops->flr_done(ae_dev); + if (ae_dev && ae_dev->ops && ae_dev->ops->reset_done) + ae_dev->ops->reset_done(ae_dev); } static const struct pci_error_handlers hns3_err_handler = { @@ -2443,12 +2469,15 @@ static const struct pci_error_handlers hns3_err_handler = { .reset_done = hns3_reset_done, }; +static SIMPLE_DEV_PM_OPS(hns3_pm_ops, hns3_suspend, hns3_resume); + static struct pci_driver hns3_driver = { .name = hns3_driver_name, .id_table = hns3_pci_tbl, .probe = hns3_probe, .remove = hns3_remove, .shutdown = hns3_shutdown, + .driver.pm = &hns3_pm_ops, .sriov_configure = hns3_pci_sriov_configure, .err_handler = &hns3_err_handler, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index bc805d5fb16e..c296ab64fb0a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2880,6 +2880,28 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev, int *link_status) return hclge_get_mac_link_status(hdev, link_status); } +static void hclge_push_link_status(struct hclge_dev *hdev) +{ + struct hclge_vport *vport; + int ret; + u16 i; + + for (i = 0; i < pci_num_vf(hdev->pdev); i++) { + vport = &hdev->vport[i + HCLGE_VF_VPORT_START_NUM]; + + if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) || + vport->vf_info.link_state != IFLA_VF_LINK_STATE_AUTO) + continue; + + ret = hclge_push_vf_link_status(vport); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to push link status to vf%u, ret = %d\n", + i, ret); + } + } +} + static void hclge_update_link_status(struct hclge_dev *hdev) { struct hnae3_handle *rhandle = &hdev->vport[0].roce; @@ -2908,6 +2930,7 @@ static void hclge_update_link_status(struct hclge_dev *hdev) rclient->ops->link_status_change(rhandle, state); hdev->hw.mac.link = state; + hclge_push_link_status(hdev); } clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); @@ -3246,14 +3269,24 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + int link_state_old; + int ret; vport = hclge_get_vf_vport(hdev, vf); if (!vport) return -EINVAL; + link_state_old = vport->vf_info.link_state; vport->vf_info.link_state = link_state; - return 0; + ret = hclge_push_vf_link_status(vport); + if (ret) { + vport->vf_info.link_state = link_state_old; + dev_err(&hdev->pdev->dev, + "failed to push vf%d link status, ret = %d\n", vf, ret); + } + + return ret; } static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) @@ -4118,7 +4151,6 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) * normalcy is to reset. * 2. A new reset request from the stack due to timeout * - * For the first case,error event might not have ae handle available. * check if this is a new reset request and we are not here just because * last reset attempt did not succeed and watchdog hit us again. We will * know this if last reset request did not occur very recently (watchdog @@ -4128,14 +4160,14 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) * want to make sure we throttle the reset request. Therefore, we will * not allow it again before 3*HZ times. */ - if (!handle) - handle = &hdev->vport[0].nic; if (time_before(jiffies, (hdev->last_reset_time + HCLGE_RESET_INTERVAL))) { mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL); return; - } else if (hdev->default_reset_request) { + } + + if (hdev->default_reset_request) { hdev->reset_level = hclge_get_reset_level(ae_dev, &hdev->default_reset_request); @@ -11058,10 +11090,11 @@ static void hclge_state_uninit(struct hclge_dev *hdev) cancel_delayed_work_sync(&hdev->service_task); } -static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) +static void hclge_reset_prepare_general(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type) { -#define HCLGE_FLR_RETRY_WAIT_MS 500 -#define HCLGE_FLR_RETRY_CNT 5 +#define HCLGE_RESET_RETRY_WAIT_MS 500 +#define HCLGE_RESET_RETRY_CNT 5 struct hclge_dev *hdev = ae_dev->priv; int retry_cnt = 0; @@ -11070,30 +11103,32 @@ static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) retry: down(&hdev->reset_sem); set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - hdev->reset_type = HNAE3_FLR_RESET; + hdev->reset_type = rst_type; ret = hclge_reset_prepare(hdev); if (ret || hdev->reset_pending) { - dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n", ret); if (hdev->reset_pending || - retry_cnt++ < HCLGE_FLR_RETRY_CNT) { + retry_cnt++ < HCLGE_RESET_RETRY_CNT) { dev_err(&hdev->pdev->dev, "reset_pending:0x%lx, retry_cnt:%d\n", hdev->reset_pending, retry_cnt); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); up(&hdev->reset_sem); - msleep(HCLGE_FLR_RETRY_WAIT_MS); + msleep(HCLGE_RESET_RETRY_WAIT_MS); goto retry; } } - /* disable misc vector before FLR done */ + /* disable misc vector before reset done */ hclge_enable_vector(&hdev->misc_vector, false); set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - hdev->rst_stats.flr_rst_cnt++; + + if (hdev->reset_type == HNAE3_FLR_RESET) + hdev->rst_stats.flr_rst_cnt++; } -static void hclge_flr_done(struct hnae3_ae_dev *ae_dev) +static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; int ret; @@ -11786,7 +11821,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, if (ret) return ret; - /* RSS indirection table has been configuared by user */ + /* RSS indirection table has been configured by user */ if (rxfh_configured) goto out; @@ -12466,8 +12501,8 @@ static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset, static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, - .flr_prepare = hclge_flr_prepare, - .flr_done = hclge_flr_done, + .reset_prepare = hclge_reset_prepare_general, + .reset_done = hclge_reset_done, .init_client_instance = hclge_init_client_instance, .uninit_client_instance = hclge_uninit_client_instance, .map_ring_to_vector = hclge_map_ring_to_vector, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index c1aaf7c534c9..ff1d47308c2d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -1089,4 +1089,5 @@ void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type); void hclge_inform_vf_promisc_info(struct hclge_vport *vport); void hclge_dbg_dump_rst_info(struct hclge_dev *hdev); +int hclge_push_vf_link_status(struct hclge_vport *vport); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index c88607bdda59..5512ffe0a149 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -490,16 +490,14 @@ static void hclge_get_vf_media_type(struct hclge_vport *vport, resp_msg->len = HCLGE_VF_MEDIA_TYPE_LENGTH; } -static int hclge_get_link_info(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +int hclge_push_vf_link_status(struct hclge_vport *vport) { #define HCLGE_VF_LINK_STATE_UP 1U #define HCLGE_VF_LINK_STATE_DOWN 0U struct hclge_dev *hdev = vport->back; u16 link_status; - u8 msg_data[8]; - u8 dest_vfid; + u8 msg_data[9]; u16 duplex; /* mac.link can only be 0 or 1 */ @@ -520,11 +518,11 @@ static int hclge_get_link_info(struct hclge_vport *vport, memcpy(&msg_data[0], &link_status, sizeof(u16)); memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32)); memcpy(&msg_data[6], &duplex, sizeof(u16)); - dest_vfid = mbx_req->mbx_src_vfid; + msg_data[8] = HCLGE_MBX_PUSH_LINK_STATUS_EN; /* send this requested info to VF */ return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data), - HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid); + HCLGE_MBX_LINK_STAT_CHANGE, vport->vport_id); } static void hclge_get_link_mode(struct hclge_vport *vport, @@ -794,7 +792,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_get_vf_tcinfo(vport, &resp_msg); break; case HCLGE_MBX_GET_LINK_STATUS: - ret = hclge_get_link_info(vport, req); + ret = hclge_push_vf_link_status(vport); if (ret) dev_err(&hdev->pdev->dev, "failed to inform link stat to VF, ret = %d\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 1682769112d0..0db51ef15ef6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2114,10 +2114,11 @@ static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en) writel(en ? 1 : 0, vector->addr); } -static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev) +static void hclgevf_reset_prepare_general(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type) { -#define HCLGEVF_FLR_RETRY_WAIT_MS 500 -#define HCLGEVF_FLR_RETRY_CNT 5 +#define HCLGEVF_RESET_RETRY_WAIT_MS 500 +#define HCLGEVF_RESET_RETRY_CNT 5 struct hclgevf_dev *hdev = ae_dev->priv; int retry_cnt = 0; @@ -2126,29 +2127,31 @@ static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev) retry: down(&hdev->reset_sem); set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - hdev->reset_type = HNAE3_FLR_RESET; + hdev->reset_type = rst_type; ret = hclgevf_reset_prepare(hdev); if (ret) { - dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n", ret); if (hdev->reset_pending || - retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) { + retry_cnt++ < HCLGEVF_RESET_RETRY_CNT) { dev_err(&hdev->pdev->dev, "reset_pending:0x%lx, retry_cnt:%d\n", hdev->reset_pending, retry_cnt); clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); up(&hdev->reset_sem); - msleep(HCLGEVF_FLR_RETRY_WAIT_MS); + msleep(HCLGEVF_RESET_RETRY_WAIT_MS); goto retry; } } - /* disable misc vector before FLR done */ + /* disable misc vector before reset done */ hclgevf_enable_vector(&hdev->misc_vector, false); - hdev->rst_stats.flr_rst_cnt++; + + if (hdev->reset_type == HNAE3_FLR_RESET) + hdev->rst_stats.flr_rst_cnt++; } -static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev) +static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) { struct hclgevf_dev *hdev = ae_dev->priv; int ret; @@ -2223,7 +2226,7 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { - /* PF has initmated that it is about to reset the hardware. + /* PF has intimated that it is about to reset the hardware. * We now have to poll & check if hardware has actually * completed the reset sequence. On hardware reset completion, * VF needs to reset the client and ae device. @@ -2337,10 +2340,11 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL)) hclgevf_tqps_update_stats(handle); - /* request the link status from the PF. PF would be able to tell VF - * about such updates in future so we might remove this later + /* VF does not need to request link status when this bit is set, because + * PF will push its link status to VFs when link status changed. */ - hclgevf_request_link_info(hdev); + if (!test_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, &hdev->state)) + hclgevf_request_link_info(hdev); hclgevf_update_link_mode(hdev); @@ -2653,14 +2657,15 @@ static int hclgevf_ae_start(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); + clear_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, &hdev->state); + hclgevf_reset_tqp_stats(handle); hclgevf_request_link_info(hdev); hclgevf_update_link_mode(hdev); - clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); - return 0; } @@ -3523,7 +3528,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, if (ret) return ret; - /* RSS indirection table has been configuared by user */ + /* RSS indirection table has been configured by user */ if (rxfh_configured) goto out; @@ -3641,7 +3646,7 @@ static void hclgevf_get_link_mode(struct hnae3_handle *handle, } #define MAX_SEPARATE_NUM 4 -#define SEPARATOR_VALUE 0xFFFFFFFF +#define SEPARATOR_VALUE 0xFDFCFBFA #define REG_NUM_PER_LINE 4 #define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) @@ -3748,8 +3753,8 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, static const struct hnae3_ae_ops hclgevf_ops = { .init_ae_dev = hclgevf_init_ae_dev, .uninit_ae_dev = hclgevf_uninit_ae_dev, - .flr_prepare = hclgevf_flr_prepare, - .flr_done = hclgevf_flr_done, + .reset_prepare = hclgevf_reset_prepare_general, + .reset_done = hclgevf_reset_done, .init_client_instance = hclgevf_init_client_instance, .uninit_client_instance = hclgevf_uninit_client_instance, .start = hclgevf_ae_start, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index ade6e7f5be5b..265c9b0b4728 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -152,6 +152,7 @@ enum hclgevf_states { HCLGEVF_STATE_LINK_UPDATING, HCLGEVF_STATE_PROMISC_CHANGED, HCLGEVF_STATE_RST_FAIL, + HCLGEVF_STATE_PF_PUSH_LINK_STATUS, }; struct hclgevf_mac { @@ -176,9 +177,9 @@ struct hclgevf_hw { /* TQP stats */ struct hlcgevf_tqp_stats { - /* query_tqp_tx_queue_statistics ,opcode id: 0x0B03 */ + /* query_tqp_tx_queue_statistics, opcode id: 0x0B03 */ u64 rcb_tx_ring_pktnum_rcd; /* 32bit */ - /* query_tqp_rx_queue_statistics ,opcode id: 0x0B13 */ + /* query_tqp_rx_queue_statistics, opcode id: 0x0B13 */ u64 rcb_rx_ring_pktnum_rcd; /* 32bit */ }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 5b2dcd97c107..9b17735b9f4c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -276,6 +276,7 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) u8 duplex; u32 speed; u32 tail; + u8 flag; u8 idx; /* we can safely clear it now as we are at start of the async message @@ -300,11 +301,16 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) link_status = msg_q[1]; memcpy(&speed, &msg_q[2], sizeof(speed)); duplex = (u8)msg_q[4]; + flag = (u8)msg_q[5]; /* update upper layer with new link link status */ hclgevf_update_link_status(hdev, link_status); hclgevf_update_speed_duplex(hdev, speed, duplex); + if (flag & HCLGE_MBX_PUSH_LINK_STATUS_EN) + set_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, + &hdev->state); + break; case HCLGE_MBX_LINK_STAT_MODE: idx = (u8)msg_q[1]; diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index c2e740475786..ea55314b209d 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -109,6 +109,7 @@ static const struct of_device_id ehea_device_table[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, ehea_device_table); static struct platform_driver ehea_driver = { .driver = { diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 473411542911..5788bb956d73 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -827,6 +827,30 @@ static void release_napi(struct ibmvnic_adapter *adapter) adapter->napi_enabled = false; } +static const char *adapter_state_to_string(enum vnic_state state) +{ + switch (state) { + case VNIC_PROBING: + return "PROBING"; + case VNIC_PROBED: + return "PROBED"; + case VNIC_OPENING: + return "OPENING"; + case VNIC_OPEN: + return "OPEN"; + case VNIC_CLOSING: + return "CLOSING"; + case VNIC_CLOSED: + return "CLOSED"; + case VNIC_REMOVING: + return "REMOVING"; + case VNIC_REMOVED: + return "REMOVED"; + default: + return "UNKNOWN"; + } +} + static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); @@ -905,7 +929,7 @@ static int ibmvnic_login(struct net_device *netdev) __ibmvnic_set_mac(netdev, adapter->mac_addr); - netdev_dbg(netdev, "[S:%d] Login succeeded\n", adapter->state); + netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); return 0; } @@ -1149,19 +1173,13 @@ static int __ibmvnic_open(struct net_device *netdev) rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_disable(&adapter->napi[i]); + ibmvnic_napi_disable(adapter); release_resources(adapter); return rc; } netif_tx_start_all_queues(netdev); - if (prev_state == VNIC_CLOSED) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - } - adapter->state = VNIC_OPEN; return rc; } @@ -1185,8 +1203,9 @@ static int ibmvnic_open(struct net_device *netdev) * honor our setting below. */ if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { - netdev_dbg(netdev, "[S:%d FOP:%d] Resetting, deferring open\n", - adapter->state, adapter->failover_pending); + netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending); adapter->state = VNIC_OPEN; rc = 0; goto out; @@ -1350,8 +1369,9 @@ static int ibmvnic_close(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; - netdev_dbg(netdev, "[S:%d FOP:%d FRR:%d] Closing\n", - adapter->state, adapter->failover_pending, + netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, adapter->force_reset_recovery); /* If device failover is pending, just set device state and return. @@ -1911,6 +1931,26 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return rc; } +static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) +{ + switch (reason) { + case VNIC_RESET_FAILOVER: + return "FAILOVER"; + case VNIC_RESET_MOBILITY: + return "MOBILITY"; + case VNIC_RESET_FATAL: + return "FATAL"; + case VNIC_RESET_NON_FATAL: + return "NON_FATAL"; + case VNIC_RESET_TIMEOUT: + return "TIMEOUT"; + case VNIC_RESET_CHANGE_PARAM: + return "CHANGE_PARAM"; + default: + return "UNKNOWN"; + } +} + /* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. @@ -1921,12 +1961,14 @@ static int do_reset(struct ibmvnic_adapter *adapter, u64 old_num_rx_queues, old_num_tx_queues; u64 old_num_rx_slots, old_num_tx_slots; struct net_device *netdev = adapter->netdev; - int i, rc; + int rc; netdev_dbg(adapter->netdev, - "[S:%d FOP:%d] Reset reason %d, reset_state %d\n", - adapter->state, adapter->failover_pending, - rwi->reset_reason, reset_state); + "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + reset_reason_to_string(rwi->reset_reason), + adapter_state_to_string(reset_state)); adapter->reset_reason = rwi->reset_reason; /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ @@ -1986,8 +2028,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, * from VNIC_CLOSING state. */ netdev_dbg(netdev, - "Open changed state from %d, updating.\n", - reset_state); + "Open changed state from %s, updating.\n", + adapter_state_to_string(reset_state)); reset_state = VNIC_OPEN; adapter->state = VNIC_CLOSING; } @@ -2110,10 +2152,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* refresh device's multicast list */ ibmvnic_set_multi(netdev); - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - if (adapter->reset_reason == VNIC_RESET_FAILOVER || adapter->reset_reason == VNIC_RESET_MOBILITY) __netdev_notify_peers(netdev); @@ -2128,8 +2166,9 @@ out: if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) rtnl_unlock(); - netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n", - adapter->state, adapter->failover_pending, rc); + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); return rc; } @@ -2139,8 +2178,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, struct net_device *netdev = adapter->netdev; int rc; - netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", + reset_reason_to_string(rwi->reset_reason)); /* read the state and check (again) after getting rtnl */ reset_state = adapter->state; @@ -2206,8 +2245,9 @@ out: /* restore adapter state if reset failed */ if (rc) adapter->state = reset_state; - netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Hard reset done, rc %d\n", - adapter->state, adapter->failover_pending, rc); + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); return rc; } @@ -2242,8 +2282,9 @@ static void __ibmvnic_reset(struct work_struct *work) adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); if (test_and_set_bit_lock(0, &adapter->resetting)) { - schedule_delayed_work(&adapter->ibmvnic_delayed_reset, - IBMVNIC_RESET_DELAY); + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); return; } @@ -2286,8 +2327,8 @@ static void __ibmvnic_reset(struct work_struct *work) if (rc) { /* give backing device time to settle down */ netdev_dbg(adapter->netdev, - "[S:%d] Hard reset failed, waiting 60 secs\n", - adapter->state); + "[S:%s] Hard reset failed, waiting 60 secs\n", + adapter_state_to_string(adapter->state)); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } @@ -2315,8 +2356,9 @@ static void __ibmvnic_reset(struct work_struct *work) clear_bit_unlock(0, &adapter->resetting); netdev_dbg(adapter->netdev, - "[S:%d FRR:%d WFR:%d] Done processing resets\n", - adapter->state, adapter->force_reset_recovery, + "[S:%s FRR:%d WFR:%d] Done processing resets\n", + adapter_state_to_string(adapter->state), + adapter->force_reset_recovery, adapter->wait_for_reset); } @@ -2363,8 +2405,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, list_for_each(entry, &adapter->rwi_list) { tmp = list_entry(entry, struct ibmvnic_rwi, list); if (tmp->reset_reason == reason) { - netdev_dbg(netdev, "Skipping matching reset, reason=%d\n", - reason); + netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", + reset_reason_to_string(reason)); ret = EBUSY; goto err; } @@ -2384,8 +2426,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); - schedule_work(&adapter->ibmvnic_reset); + netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", + reset_reason_to_string(reason)); + queue_work(system_long_wq, &adapter->ibmvnic_reset); ret = 0; err: @@ -3203,9 +3246,6 @@ restart_loop: next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) - dev_err(dev, "tx error %x\n", - next->tx_comp.rcs[i]); index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3219,7 +3259,13 @@ restart_loop: num_entries += txbuff->num_entries; if (txbuff->skb) { total_bytes += txbuff->skb->len; - dev_consume_skb_irq(txbuff->skb); + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } txbuff->skb = NULL; } else { netdev_warn(adapter->netdev, @@ -5451,7 +5497,7 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, if (rc) { netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", rc); - return -EINVAL; + goto last_resort; } session_token = (__be64)retbuf[0]; @@ -5459,15 +5505,17 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, be64_to_cpu(session_token)); rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); - if (rc) { - netdev_err(netdev, "Client initiated failover failed, rc %ld\n", + if (rc) + netdev_err(netdev, + "H_VIOCTL initiated failover failed, rc %ld\n", rc); - return -EINVAL; - } + +last_resort: + netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); return count; } - static DEVICE_ATTR_WO(failover); static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 806aa75a4e86..c1d39a748546 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -412,77 +412,6 @@ struct ibmvnic_control_ip_offload { struct ibmvnic_rc rc; } __packed __aligned(8); -struct ibmvnic_request_dump_size { - u8 first; - u8 cmd; - u8 reserved[6]; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_dump { - u8 first; - u8 cmd; - u8 reserved1[2]; - __be32 ioba; - __be32 len; - u8 reserved2[4]; -} __packed __aligned(8); - -struct ibmvnic_request_dump_rsp { - u8 first; - u8 cmd; - u8 reserved[6]; - __be32 dumped_len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_ras_comp_num { - u8 first; - u8 cmd; - u8 reserved1[2]; - __be32 num_components; - u8 reserved2[4]; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_ras_comps { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_control_ras { - u8 first; - u8 cmd; - u8 correlator; - u8 level; - u8 op; -#define IBMVNIC_TRACE_LEVEL 1 -#define IBMVNIC_ERROR_LEVEL 2 -#define IBMVNIC_TRACE_PAUSE 3 -#define IBMVNIC_TRACE_RESUME 4 -#define IBMVNIC_TRACE_ON 5 -#define IBMVNIC_TRACE_OFF 6 -#define IBMVNIC_CHG_TRACE_BUFF_SZ 7 - u8 trace_buff_sz[3]; - u8 reserved[4]; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_collect_fw_trace { - u8 first; - u8 cmd; - u8 correlator; - u8 reserved; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_request_statistics { u8 first; u8 cmd; @@ -494,15 +423,6 @@ struct ibmvnic_request_statistics { u8 reserved[4]; } __packed __aligned(8); -struct ibmvnic_request_debug_stats { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_error_indication { u8 first; u8 cmd; @@ -677,22 +597,8 @@ union ibmvnic_crq { struct ibmvnic_query_ip_offload query_ip_offload_rsp; struct ibmvnic_control_ip_offload control_ip_offload; struct ibmvnic_control_ip_offload control_ip_offload_rsp; - struct ibmvnic_request_dump_size request_dump_size; - struct ibmvnic_request_dump_size request_dump_size_rsp; - struct ibmvnic_request_dump request_dump; - struct ibmvnic_request_dump_rsp request_dump_rsp; - struct ibmvnic_request_ras_comp_num request_ras_comp_num; - struct ibmvnic_request_ras_comp_num request_ras_comp_num_rsp; - struct ibmvnic_request_ras_comps request_ras_comps; - struct ibmvnic_request_ras_comps request_ras_comps_rsp; - struct ibmvnic_control_ras control_ras; - struct ibmvnic_control_ras control_ras_rsp; - struct ibmvnic_collect_fw_trace collect_fw_trace; - struct ibmvnic_collect_fw_trace collect_fw_trace_rsp; struct ibmvnic_request_statistics request_statistics; struct ibmvnic_generic_crq request_statistics_rsp; - struct ibmvnic_request_debug_stats request_debug_stats; - struct ibmvnic_request_debug_stats request_debug_stats_rsp; struct ibmvnic_error_indication error_indication; struct ibmvnic_link_state_indication link_state_indication; struct ibmvnic_change_mac_addr change_mac_addr; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 5aa86318ed3e..c1d155690341 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -294,6 +294,7 @@ config ICE tristate "Intel(R) Ethernet Connection E800 Series Support" default n depends on PCI_MSI + select DIMLIB select NET_DEVLINK select PLDMFW help diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index cd53981fa5e0..15f93b355099 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -142,6 +142,7 @@ enum i40e_state_t { __I40E_VIRTCHNL_OP_PENDING, __I40E_RECOVERY_MODE, __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */ + __I40E_VFS_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_STATE_SIZE__, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index e8230da29f05..291e61ac3e44 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -578,6 +578,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, case RING_TYPE_XDP: ring = kmemdup(vsi->xdp_rings[ring_id], sizeof(*ring), GFP_KERNEL); break; + default: + ring = NULL; + break; } if (!ring) return; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c4c167650b6b..040a01400b85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -232,6 +232,8 @@ static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], I40E_STAT(struct i40e_vsi, _name, _stat) #define I40E_VEB_STAT(_name, _stat) \ I40E_STAT(struct i40e_veb, _name, _stat) +#define I40E_VEB_TC_STAT(_name, _stat) \ + I40E_STAT(struct i40e_cp_veb_tc_stats, _name, _stat) #define I40E_PFC_STAT(_name, _stat) \ I40E_STAT(struct i40e_pfc_stats, _name, _stat) #define I40E_QUEUE_STAT(_name, _stat) \ @@ -266,11 +268,18 @@ static const struct i40e_stats i40e_gstrings_veb_stats[] = { I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol), }; +struct i40e_cp_veb_tc_stats { + u64 tc_rx_packets; + u64 tc_rx_bytes; + u64 tc_tx_packets; + u64 tc_tx_bytes; +}; + static const struct i40e_stats i40e_gstrings_veb_tc_stats[] = { - I40E_VEB_STAT("veb.tc_%u_tx_packets", tc_stats.tc_tx_packets), - I40E_VEB_STAT("veb.tc_%u_tx_bytes", tc_stats.tc_tx_bytes), - I40E_VEB_STAT("veb.tc_%u_rx_packets", tc_stats.tc_rx_packets), - I40E_VEB_STAT("veb.tc_%u_rx_bytes", tc_stats.tc_rx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_tx_packets", tc_tx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_tx_bytes", tc_tx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_rx_packets", tc_rx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_rx_bytes", tc_rx_bytes), }; static const struct i40e_stats i40e_gstrings_misc_stats[] = { @@ -1101,6 +1110,7 @@ static int i40e_get_link_ksettings(struct net_device *netdev, /* Set flow control settings */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause); switch (hw->fc.requested_mode) { case I40E_FC_FULL: @@ -2217,6 +2227,29 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset) } /** + * i40e_get_veb_tc_stats - copy VEB TC statistics to formatted structure + * @tc: the TC statistics in VEB structure (veb->tc_stats) + * @i: the index of traffic class in (veb->tc_stats) structure to copy + * + * Copy VEB TC statistics from structure of arrays (veb->tc_stats) to + * one dimensional structure i40e_cp_veb_tc_stats. + * Produce formatted i40e_cp_veb_tc_stats structure of the VEB TC + * statistics for the given TC. + **/ +static struct i40e_cp_veb_tc_stats +i40e_get_veb_tc_stats(struct i40e_veb_tc_stats *tc, unsigned int i) +{ + struct i40e_cp_veb_tc_stats veb_tc = { + .tc_rx_packets = tc->tc_rx_packets[i], + .tc_rx_bytes = tc->tc_rx_bytes[i], + .tc_tx_packets = tc->tc_tx_packets[i], + .tc_tx_bytes = tc->tc_tx_bytes[i], + }; + + return veb_tc; +} + +/** * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure * @pf: the PF device structure * @i: the priority value to copy @@ -2300,8 +2333,16 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, i40e_gstrings_veb_stats); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) - i40e_add_ethtool_stats(&data, veb_stats ? veb : NULL, - i40e_gstrings_veb_tc_stats); + if (veb_stats) { + struct i40e_cp_veb_tc_stats veb_tc = + i40e_get_veb_tc_stats(&veb->tc_stats, i); + + i40e_add_ethtool_stats(&data, &veb_tc, + i40e_gstrings_veb_tc_stats); + } else { + i40e_add_ethtool_stats(&data, NULL, + i40e_gstrings_veb_tc_stats); + } i40e_add_ethtool_stats(&data, pf, i40e_gstrings_stats); @@ -5433,7 +5474,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev, status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - true, addr, offset, &value, NULL); + addr, true, offset, &value, NULL); if (status) return -EIO; data[i] = value; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1555d6009bf5..687ef52a8116 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2560,8 +2560,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) i40e_stat_str(hw, aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { - dev_info(&pf->pdev->dev, "%s is %s allmulti mode.\n", - vsi->netdev->name, + dev_info(&pf->pdev->dev, "%s allmulti mode.\n", cur_multipromisc ? "entering" : "leaving"); } } @@ -6738,9 +6737,9 @@ out: set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } - /* registers are set, lets apply */ - if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) - ret = i40e_hw_set_dcb_config(pf, new_cfg); + /* registers are set, lets apply */ + if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) + ret = i40e_hw_set_dcb_config(pf, new_cfg); } err: @@ -10573,12 +10572,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) goto end_core_reset; } - if (!lock_acquired) - rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); - if (ret) - goto end_unlock; - #ifdef CONFIG_I40E_DCB /* Enable FW to write a default DCB config on link-up * unless I40E_FLAG_TC_MQPRIO was enabled or DCB @@ -10593,7 +10586,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_aq_set_dcb_parameters(hw, false, NULL); dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; } else { i40e_aq_set_dcb_parameters(hw, true, NULL); ret = i40e_init_pf_dcb(pf); @@ -10607,6 +10600,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } #endif /* CONFIG_I40E_DCB */ + if (!lock_acquired) + rtnl_lock(); + ret = i40e_setup_pf_switch(pf, reinit); + if (ret) + goto end_unlock; /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. @@ -12359,6 +12357,7 @@ static int i40e_sw_init(struct i40e_pf *pf) { int err = 0; int size; + u16 pow; /* Set default capability flags */ pf->flags = I40E_FLAG_RX_CSUM_ENABLED | @@ -12377,6 +12376,11 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); + + /* find the next higher power-of-2 of num cpus */ + pow = roundup_pow_of_two(num_online_cpus()); + pf->rss_size_max = min_t(int, pf->rss_size_max, pow); + if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; pf->alloc_rss_size = min_t(int, pf->rss_size_max, @@ -15140,12 +15144,16 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) * in order to register the netdev */ v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); - if (v_idx < 0) + if (v_idx < 0) { + err = v_idx; goto err_switch_setup; + } pf->lan_vsi = v_idx; vsi = pf->vsi[v_idx]; - if (!vsi) + if (!vsi) { + err = -EFAULT; goto err_switch_setup; + } vsi->alloc_queue_pairs = 1; err = i40e_config_netdev(vsi); if (err) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index fc20afc23bfa..121cd99fdeff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2295,8 +2295,7 @@ int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring) * @rx_ring: Rx ring being processed * @xdp: XDP buffer containing the frame **/ -static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, - struct xdp_buff *xdp) +static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { int err, result = I40E_XDP_PASS; struct i40e_ring *xdp_ring; @@ -2335,7 +2334,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, } xdp_out: rcu_read_unlock(); - return ERR_PTR(-result); + return result; } /** @@ -2448,6 +2447,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) unsigned int xdp_xmit = 0; bool failure = false; struct xdp_buff xdp; + int xdp_res = 0; #if (PAGE_SIZE < 8192) frame_sz = i40e_rx_frame_truesize(rx_ring, 0); @@ -2513,12 +2513,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size); #endif - skb = i40e_run_xdp(rx_ring, &xdp); + xdp_res = i40e_run_xdp(rx_ring, &xdp); } - if (IS_ERR(skb)) { - unsigned int xdp_res = -PTR_ERR(skb); - + if (xdp_res) { if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { xdp_xmit |= xdp_res; i40e_rx_buffer_flip(rx_ring, rx_buffer, size); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1b6ec9be155a..5d301a466f5c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -137,6 +137,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) **/ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { + struct i40e_pf *pf = vf->pf; int i; i40e_vc_notify_vf_reset(vf); @@ -147,6 +148,11 @@ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) * ensure a reset. */ for (i = 0; i < 20; i++) { + /* If PF is in VFs releasing state reset VF is impossible, + * so leave it. + */ + if (test_bit(__I40E_VFS_RELEASING, pf->state)) + return; if (i40e_reset_vf(vf, false)) return; usleep_range(10000, 20000); @@ -1574,6 +1580,8 @@ void i40e_free_vfs(struct i40e_pf *pf) if (!pf->vf) return; + + set_bit(__I40E_VFS_RELEASING, pf->state); while (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) usleep_range(1000, 2000); @@ -1631,6 +1639,7 @@ void i40e_free_vfs(struct i40e_pf *pf) } } clear_bit(__I40E_VF_DISABLE, pf->state); + clear_bit(__I40E_VFS_RELEASING, pf->state); } #ifdef CONFIG_PCI_IOV diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index d89c22347d9d..46d884417c63 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -474,7 +474,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); if (!nb_pkts) - return false; + return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; @@ -491,7 +491,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); - return true; + return nb_pkts < budget; } /** diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 02badaaf818c..7ae10fd87265 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -36,6 +36,7 @@ #include <linux/bpf.h> #include <linux/avf/virtchnl.h> #include <linux/cpu_rmap.h> +#include <linux/dim.h> #include <net/devlink.h> #include <net/ipv6.h> #include <net/xdp_sock.h> @@ -44,6 +45,9 @@ #include <net/gre.h> #include <net/udp_tunnel.h> #include <net/vxlan.h> +#if IS_ENABLED(CONFIG_DCB) +#include <scsi/iscsi_proto.h> +#endif /* CONFIG_DCB */ #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -194,51 +198,52 @@ struct ice_sw { }; enum ice_pf_state { - __ICE_TESTING, - __ICE_DOWN, - __ICE_NEEDS_RESTART, - __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ - __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ - __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */ - __ICE_PFR_REQ, /* set by driver and peers */ - __ICE_CORER_REQ, /* set by driver and peers */ - __ICE_GLOBR_REQ, /* set by driver and peers */ - __ICE_CORER_RECV, /* set by OICR handler */ - __ICE_GLOBR_RECV, /* set by OICR handler */ - __ICE_EMPR_RECV, /* set by OICR handler */ - __ICE_SUSPENDED, /* set on module remove path */ - __ICE_RESET_FAILED, /* set by reset/rebuild */ + ICE_TESTING, + ICE_DOWN, + ICE_NEEDS_RESTART, + ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ + ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ + ICE_PFR_REQ, /* set by driver and peers */ + ICE_CORER_REQ, /* set by driver and peers */ + ICE_GLOBR_REQ, /* set by driver and peers */ + ICE_CORER_RECV, /* set by OICR handler */ + ICE_GLOBR_RECV, /* set by OICR handler */ + ICE_EMPR_RECV, /* set by OICR handler */ + ICE_SUSPENDED, /* set on module remove path */ + ICE_RESET_FAILED, /* set by reset/rebuild */ /* When checking for the PF to be in a nominal operating state, the * bits that are grouped at the beginning of the list need to be - * checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will + * checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will * be checked. If you need to add a bit into consideration for nominal * operating state, it must be added before - * __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position + * ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position * without appropriate consideration. */ - __ICE_STATE_NOMINAL_CHECK_BITS, - __ICE_ADMINQ_EVENT_PENDING, - __ICE_MAILBOXQ_EVENT_PENDING, - __ICE_MDD_EVENT_PENDING, - __ICE_VFLR_EVENT_PENDING, - __ICE_FLTR_OVERFLOW_PROMISC, - __ICE_VF_DIS, - __ICE_CFG_BUSY, - __ICE_SERVICE_SCHED, - __ICE_SERVICE_DIS, - __ICE_FD_FLUSH_REQ, - __ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ - __ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ - __ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ - __ICE_LINK_DEFAULT_OVERRIDE_PENDING, - __ICE_PHY_INIT_COMPLETE, - __ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ - __ICE_STATE_NBITS /* must be last */ + ICE_STATE_NOMINAL_CHECK_BITS, + ICE_ADMINQ_EVENT_PENDING, + ICE_MAILBOXQ_EVENT_PENDING, + ICE_MDD_EVENT_PENDING, + ICE_VFLR_EVENT_PENDING, + ICE_FLTR_OVERFLOW_PROMISC, + ICE_VF_DIS, + ICE_CFG_BUSY, + ICE_SERVICE_SCHED, + ICE_SERVICE_DIS, + ICE_FD_FLUSH_REQ, + ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ + ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ + ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ + ICE_LINK_DEFAULT_OVERRIDE_PENDING, + ICE_PHY_INIT_COMPLETE, + ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ + ICE_STATE_NBITS /* must be last */ }; enum ice_vsi_state { ICE_VSI_DOWN, ICE_VSI_NEEDS_RESTART, + ICE_VSI_NETDEV_ALLOCD, + ICE_VSI_NETDEV_REGISTERED, ICE_VSI_UMAC_FLTR_CHANGED, ICE_VSI_MMAC_FLTR_CHANGED, ICE_VSI_VLAN_FLTR_CHANGED, @@ -265,7 +270,6 @@ struct ice_vsi { u32 tx_busy; u32 rx_buf_failed; u32 rx_page_failed; - u32 rx_gro_dropped; u16 num_q_vectors; u16 base_vector; /* IRQ base for OS reserved vectors */ enum ice_vsi_type type; @@ -348,7 +352,7 @@ struct ice_q_vector { u16 reg_idx; u8 num_ring_rx; /* total number of Rx rings in vector */ u8 num_ring_tx; /* total number of Tx rings in vector */ - u8 itr_countdown; /* when 0 should adjust adaptive ITR */ + u8 wb_on_itr:1; /* if true, WB on ITR is enabled */ /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this * value to the device */ @@ -363,6 +367,8 @@ struct ice_q_vector { struct irq_affinity_notify affinity_notify; char name[ICE_INT_NAME_STR_LEN]; + + u16 total_events; /* net_dim(): number of interrupts processed */ } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -420,7 +426,7 @@ struct ice_pf { u16 num_msix_per_vf; /* used to ratelimit the MDD event logging */ unsigned long last_printed_mdd_jiffies; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ @@ -631,7 +637,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); const char *ice_stat_str(enum ice_status stat_err); const char *ice_aq_str(enum ice_aq_err aq_err); -bool ice_is_wol_supported(struct ice_pf *pf); +bool ice_is_wol_supported(struct ice_hw *hw); int ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add, bool is_tun); @@ -649,6 +655,7 @@ int ice_fdir_create_dflt_rules(struct ice_pf *pf); int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, struct ice_rq_event_info *event); int ice_open(struct net_device *netdev); +int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index b9491ef5f21c..5cdfe406af84 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -877,16 +877,18 @@ struct ice_aqc_get_phy_caps { __le16 param0; /* 18.0 - Report qualified modules */ #define ICE_AQC_GET_PHY_RQM BIT(0) - /* 18.1 - 18.2 : Report mode - * 00b - Report NVM capabilities - * 01b - Report topology capabilities - * 10b - Report SW configured + /* 18.1 - 18.3 : Report mode + * 000b - Report NVM capabilities + * 001b - Report topology capabilities + * 010b - Report SW configured + * 100b - Report default capabilities */ -#define ICE_AQC_REPORT_MODE_S 1 -#define ICE_AQC_REPORT_MODE_M (3 << ICE_AQC_REPORT_MODE_S) -#define ICE_AQC_REPORT_NVM_CAP 0 -#define ICE_AQC_REPORT_TOPO_CAP BIT(1) -#define ICE_AQC_REPORT_SW_CFG BIT(2) +#define ICE_AQC_REPORT_MODE_S 1 +#define ICE_AQC_REPORT_MODE_M (7 << ICE_AQC_REPORT_MODE_S) +#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA 0 +#define ICE_AQC_REPORT_TOPO_CAP_MEDIA BIT(1) +#define ICE_AQC_REPORT_ACTIVE_CFG BIT(2) +#define ICE_AQC_REPORT_DFLT_CFG BIT(3) __le32 reserved1; __le32 addr_high; __le32 addr_low; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index be26775a7dfe..5985a7e5ca8a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -113,6 +113,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->v_idx = v_idx; q_vector->tx.itr_setting = ICE_DFLT_TX_ITR; q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; + q_vector->tx.itr_mode = ITR_DYNAMIC; + q_vector->rx.itr_mode = ITR_DYNAMIC; + if (vsi->type == ICE_VSI_VF) goto out; /* only set affinity_mask if the CPU is online */ @@ -740,25 +743,13 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) { ice_cfg_itr_gran(hw); - if (q_vector->num_ring_rx) { - struct ice_ring_container *rc = &q_vector->rx; - - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } + if (q_vector->num_ring_rx) + ice_write_itr(&q_vector->rx, q_vector->rx.itr_setting); - if (q_vector->num_ring_tx) { - struct ice_ring_container *rc = &q_vector->tx; + if (q_vector->num_ring_tx) + ice_write_itr(&q_vector->tx, q_vector->tx.itr_setting); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } + ice_write_intrl(q_vector, q_vector->intrl); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 54df00ee912b..e93b1e40f627 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -158,6 +158,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, return ICE_ERR_PARAM; hw = pi->hw; + if (report_mode == ICE_AQC_REPORT_DFLT_CFG && + !ice_fw_supports_report_dflt_cfg(hw)) + return ICE_ERR_PARAM; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps); if (qual_mods) @@ -191,7 +195,7 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, ice_debug(hw, ICE_DBG_LINK, " module_type[2] = 0x%x\n", pcaps->module_type[2]); - if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) { + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) { pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high); memcpy(pi->phy.link_info.module_type, &pcaps->module_type, @@ -717,8 +721,8 @@ static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable) if (!data) { data = devm_kcalloc(ice_hw_to_dev(hw), - sizeof(*data), ICE_AQC_FW_LOG_ID_MAX, + sizeof(*data), GFP_KERNEL); if (!data) return ICE_ERR_NO_MEMORY; @@ -922,7 +926,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw) /* Initialize port_info struct with PHY capabilities */ status = ice_aq_get_phy_caps(hw->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, + NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); if (status) dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n", @@ -1293,6 +1298,85 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { DEFINE_MUTEX(ice_global_cfg_lock_sw); /** + * ice_should_retry_sq_send_cmd + * @opcode: AQ opcode + * + * Decide if we should retry the send command routine for the ATQ, depending + * on the opcode. + */ +static bool ice_should_retry_sq_send_cmd(u16 opcode) +{ + switch (opcode) { + case ice_aqc_opc_get_link_topo: + case ice_aqc_opc_lldp_stop: + case ice_aqc_opc_lldp_start: + case ice_aqc_opc_lldp_filter_ctrl: + return true; + } + + return false; +} + +/** + * ice_sq_send_cmd_retry - send command to Control Queue (ATQ) + * @hw: pointer to the HW struct + * @cq: pointer to the specific Control queue + * @desc: prefilled descriptor describing the command + * @buf: buffer to use for indirect commands (or NULL for direct commands) + * @buf_size: size of buffer for indirect commands (or 0 for direct commands) + * @cd: pointer to command details structure + * + * Retry sending the FW Admin Queue command, multiple times, to the FW Admin + * Queue if the EBUSY AQ error is returned. + */ +static enum ice_status +ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc_cpy; + enum ice_status status; + bool is_cmd_for_retry; + u8 *buf_cpy = NULL; + u8 idx = 0; + u16 opcode; + + opcode = le16_to_cpu(desc->opcode); + is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode); + memset(&desc_cpy, 0, sizeof(desc_cpy)); + + if (is_cmd_for_retry) { + if (buf) { + buf_cpy = kzalloc(buf_size, GFP_KERNEL); + if (!buf_cpy) + return ICE_ERR_NO_MEMORY; + } + + memcpy(&desc_cpy, desc, sizeof(desc_cpy)); + } + + do { + status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd); + + if (!is_cmd_for_retry || !status || + hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY) + break; + + if (buf_cpy) + memcpy(buf, buf_cpy, buf_size); + + memcpy(desc, &desc_cpy, sizeof(desc_cpy)); + + mdelay(ICE_SQ_SEND_DELAY_TIME_MS); + + } while (++idx < ICE_SQ_SEND_MAX_EXECUTE); + + kfree(buf_cpy); + + return status; +} + +/** * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue * @hw: pointer to the HW struct * @desc: descriptor describing the command @@ -1333,7 +1417,7 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, break; } - status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); + status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd); if (lock_acquired) mutex_unlock(&ice_global_cfg_lock_sw); @@ -2655,7 +2739,7 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi) if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); @@ -2815,8 +2899,8 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) return ICE_ERR_NO_MEMORY; /* Get the current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, + pcaps, NULL); if (status) { *aq_failures = ICE_SET_FC_AQ_FAIL_GET; goto out; @@ -2929,17 +3013,6 @@ ice_copy_phy_caps_to_cfg(struct ice_port_info *pi, cfg->link_fec_opt = caps->link_fec_options; cfg->module_compliance_enforcement = caps->module_compliance_enforcement; - - if (ice_fw_supports_link_override(pi->hw)) { - struct ice_link_default_override_tlv tlv; - - if (ice_get_link_default_override(&tlv, pi)) - return; - - if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) - cfg->module_compliance_enforcement |= - ICE_LINK_OVERRIDE_STRICT_MODE; - } } /** @@ -2954,16 +3027,21 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, { struct ice_aqc_get_phy_caps_data *pcaps; enum ice_status status; + struct ice_hw *hw; if (!pi || !cfg) return ICE_ERR_BAD_PTR; + hw = pi->hw; + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, + (ice_fw_supports_report_dflt_cfg(hw) ? + ICE_AQC_REPORT_DFLT_CFG : + ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL); if (status) goto out; @@ -3002,7 +3080,8 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, break; } - if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) { + if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) && + !ice_fw_supports_report_dflt_cfg(hw)) { struct ice_link_default_override_tlv tlv; if (ice_get_link_default_override(&tlv, pi)) @@ -4412,3 +4491,23 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } + +/** + * ice_fw_supports_report_dflt_cfg + * @hw: pointer to the hardware structure + * + * Checks if the firmware supports report default configuration + */ +bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) +{ + if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) { + if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN) + return true; + if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN && + hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH) + return true; + } else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) { + return true; + } + return false; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 81fd69cb1485..7a9d2dfb21a2 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -11,6 +11,9 @@ #include "ice_switch.h" #include <linux/avf/virtchnl.h> +#define ICE_SQ_SEND_DELAY_TIME_MS 10 +#define ICE_SQ_SEND_MAX_EXECUTE 3 + enum ice_status ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); enum ice_status ice_check_reset(struct ice_hw *hw); @@ -176,4 +179,5 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw); enum ice_status ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add); +bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index b2d8a5932b1d..87b33bdd4960 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -892,7 +892,7 @@ static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq) * ice_sq_send_cmd - send command to Control Queue (ATQ) * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue - * @desc: prefilled descriptor describing the command (non DMA mem) + * @desc: prefilled descriptor describing the command * @buf: buffer to use for indirect commands (or NULL for direct commands) * @buf_size: size of buffer for indirect commands (or 0 for direct commands) * @cd: pointer to command details structure @@ -1097,6 +1097,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_rq_event_info *e, u16 *pending) { u16 ntc = cq->rq.next_to_clean; + enum ice_aq_err rq_last_status; enum ice_status ret_code = 0; struct ice_aq_desc *desc; struct ice_dma_mem *bi; @@ -1130,13 +1131,12 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, desc = ICE_CTL_Q_DESC(cq->rq, ntc); desc_idx = ntc; - cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); + rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n", - le16_to_cpu(desc->opcode), - cq->rq_last_status); + le16_to_cpu(desc->opcode), rq_last_status); } memcpy(&e->desc, desc, sizeof(e->desc)); datalen = le16_to_cpu(desc->datalen); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 7d0905f25ddc..fe75871e48ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -31,8 +31,8 @@ enum ice_ctl_q { ICE_CTL_Q_MAILBOX, }; -/* Control Queue timeout settings - max delay 250ms */ -#define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ +/* Control Queue timeout settings - max delay 1s */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 10000 /* Count 10000 times */ #define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ @@ -83,7 +83,6 @@ struct ice_rq_event_info { /* Control Queue information */ struct ice_ctl_q_info { enum ice_ctl_q qtype; - enum ice_aq_err rq_last_status; /* last status on receive queue */ struct ice_ctl_q_ring rq; /* receive queue */ struct ice_ctl_q_ring sq; /* send queue */ u32 sq_cmd_timeout; /* send queue cmd write back timeout */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 85c9eccfdae8..849fcf605479 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -738,22 +738,27 @@ ice_aq_get_cee_dcb_cfg(struct ice_hw *hw, /** * ice_cee_to_dcb_cfg * @cee_cfg: pointer to CEE configuration struct - * @dcbcfg: DCB configuration struct + * @pi: port information structure * * Convert CEE configuration from firmware to DCB configuration */ static void ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, - struct ice_dcbx_cfg *dcbcfg) + struct ice_port_info *pi) { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); - u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift; - u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); + u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j; u8 i, err, sync, oper, app_index, ice_app_sel_type; + u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift; + struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg; u16 ice_app_prot_id_type; - /* CEE PG data to ETS config */ + dcbcfg = &pi->qos_cfg.local_dcbx_cfg; + dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE; + dcbcfg->tlv_status = tlv_status; + + /* CEE PG data */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc; /* Note that the FW creates the oper_prio_tc nibbles reversed @@ -780,10 +785,16 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, } } - /* CEE PFC data to ETS config */ + /* CEE PFC data */ dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en; dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS; + /* CEE APP TLV data */ + if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING) + cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg; + else + cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg; + app_index = 0; for (i = 0; i < 3; i++) { if (i == 0) { @@ -793,7 +804,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S; ice_app_sel_type = ICE_APP_SEL_ETHTYPE; - ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE; + ice_app_prot_id_type = ETH_P_FCOE; } else if (i == 1) { /* iSCSI APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M; @@ -801,7 +812,19 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S; ice_app_sel_type = ICE_APP_SEL_TCPIP; - ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI; + ice_app_prot_id_type = ISCSI_LISTEN_PORT; + + for (j = 0; j < cmp_dcbcfg->numapps; j++) { + u16 prot_id = cmp_dcbcfg->app[j].prot_id; + u8 sel = cmp_dcbcfg->app[j].selector; + + if (sel == ICE_APP_SEL_TCPIP && + (prot_id == ISCSI_LISTEN_PORT || + prot_id == ICE_APP_PROT_ID_ISCSI_860)) { + ice_app_prot_id_type = prot_id; + break; + } + } } else { /* FIP APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M; @@ -809,7 +832,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S; ice_app_sel_type = ICE_APP_SEL_ETHTYPE; - ice_app_prot_id_type = ICE_APP_PROT_ID_FIP; + ice_app_prot_id_type = ETH_P_FIP; } status = (tlv_status & ice_aqc_cee_status_mask) >> @@ -892,11 +915,8 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi) ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL); if (!ret) { /* CEE mode */ - dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; - dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE; - dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status); - ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg); ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE); + ice_cee_to_dcb_cfg(&cee_cfg, pi); } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { /* CEE mode not enabled try querying IEEE data */ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 1e8f71ffc8ce..df02cffdf209 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -563,7 +563,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) dcbcfg->numapps = 1; dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE; dcbcfg->app[0].priority = 3; - dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE; + dcbcfg->app[0].prot_id = ETH_P_FCOE; ret = ice_pf_dcb_cfg(pf, dcbcfg, locked); kfree(dcbcfg); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 468a63f7eff9..4180f1f35fb8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -18,12 +18,10 @@ static void ice_dcbnl_devreset(struct net_device *netdev) while (ice_is_reset_in_progress(pf->state)) usleep_range(1000, 2000); - set_bit(__ICE_DCBNL_DEVRESET, pf->state); dev_close(netdev); netdev_state_change(netdev); dev_open(netdev, NULL); netdev_state_change(netdev); - clear_bit(__ICE_DCBNL_DEVRESET, pf->state); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 15152e63f204..d9ddd0bcf65f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -60,7 +60,6 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = { ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed), ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), - ICE_VSI_STAT("rx_gro_dropped", rx_gro_dropped), ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), ICE_VSI_STAT("tx_linearize", tx_linearize), ICE_VSI_STAT("tx_busy", tx_busy), @@ -807,7 +806,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, if (eth_test->flags == ETH_TEST_FL_OFFLINE) { netdev_info(netdev, "offline testing starting\n"); - set_bit(__ICE_TESTING, pf->state); + set_bit(ICE_TESTING, pf->state); if (ice_active_vfs(pf)) { dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); @@ -817,7 +816,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, data[ICE_ETH_TEST_LOOP] = 1; data[ICE_ETH_TEST_LINK] = 1; eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__ICE_TESTING, pf->state); + clear_bit(ICE_TESTING, pf->state); goto skip_ol_tests; } /* If the device is online then take it offline */ @@ -838,7 +837,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, data[ICE_ETH_TEST_REG]) eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__ICE_TESTING, pf->state); + clear_bit(ICE_TESTING, pf->state); if (if_running) { int status = ice_open(netdev); @@ -1060,7 +1059,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) if (!caps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EAGAIN; @@ -1095,24 +1094,15 @@ static int ice_nway_reset(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - struct ice_port_info *pi; - enum ice_status status; + int err; - pi = vsi->port_info; /* If VSI state is up, then restart autoneg with link up */ - if (!test_bit(__ICE_DOWN, vsi->back->state)) - status = ice_aq_set_link_restart_an(pi, true, NULL); + if (!test_bit(ICE_DOWN, vsi->back->state)) + err = ice_set_link(vsi, true); else - status = ice_aq_set_link_restart_an(pi, false, NULL); - - if (status) { - netdev_info(netdev, "link restart failed, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(pi->hw->adminq.sq_last_status)); - return -EIO; - } + err = ice_set_link(vsi, false); - return 0; + return err; } /** @@ -1454,8 +1444,8 @@ void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low) do { \ if (req_speeds & (aq_link_speed) || \ (!req_speeds && \ - (adv_phy_type_lo & phy_type_mask_lo || \ - adv_phy_type_hi & phy_type_mask_hi))) \ + (advert_phy_type_lo & phy_type_mask_lo || \ + advert_phy_type_hi & phy_type_mask_hi))) \ ethtool_link_ksettings_add_link_mode(ks, advertising,\ ethtool_link_mode); \ } while (0) @@ -1472,10 +1462,10 @@ ice_phy_type_to_ethtool(struct net_device *netdev, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + u64 advert_phy_type_lo = 0; + u64 advert_phy_type_hi = 0; u64 phy_type_mask_lo = 0; u64 phy_type_mask_hi = 0; - u64 adv_phy_type_lo = 0; - u64 adv_phy_type_hi = 0; u64 phy_types_high = 0; u64 phy_types_low = 0; u16 req_speeds; @@ -1493,28 +1483,35 @@ ice_phy_type_to_ethtool(struct net_device *netdev, * requested by user. */ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) { - struct ice_link_default_override_tlv *ldo; - - ldo = &pf->link_dflt_override; phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo); phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi); ice_mask_min_supported_speeds(phy_types_high, &phy_types_low); - - /* If override enabled and PHY mask set, then - * Advertising link mode is the intersection of the PHY - * types without media and the override PHY mask. + /* determine advertised modes based on link override only + * if it's supported and if the FW doesn't abstract the + * driver from having to account for link overrides */ - if (ldo->options & ICE_LINK_OVERRIDE_EN && - (ldo->phy_type_low || ldo->phy_type_high)) { - adv_phy_type_lo = - le64_to_cpu(pf->nvm_phy_type_lo) & - ldo->phy_type_low; - adv_phy_type_hi = - le64_to_cpu(pf->nvm_phy_type_hi) & - ldo->phy_type_high; + if (ice_fw_supports_link_override(&pf->hw) && + !ice_fw_supports_report_dflt_cfg(&pf->hw)) { + struct ice_link_default_override_tlv *ldo; + + ldo = &pf->link_dflt_override; + /* If override enabled and PHY mask set, then + * Advertising link mode is the intersection of the PHY + * types without media and the override PHY mask. + */ + if (ldo->options & ICE_LINK_OVERRIDE_EN && + (ldo->phy_type_low || ldo->phy_type_high)) { + advert_phy_type_lo = + le64_to_cpu(pf->nvm_phy_type_lo) & + ldo->phy_type_low; + advert_phy_type_hi = + le64_to_cpu(pf->nvm_phy_type_hi) & + ldo->phy_type_high; + } } } else { + /* strict mode */ phy_types_low = vsi->port_info->phy.phy_type_low; phy_types_high = vsi->port_info->phy.phy_type_high; } @@ -1522,9 +1519,9 @@ ice_phy_type_to_ethtool(struct net_device *netdev, /* If Advertising link mode PHY type is not using override PHY type, * then use PHY type with media. */ - if (!adv_phy_type_lo && !adv_phy_type_hi) { - adv_phy_type_lo = vsi->port_info->phy.phy_type_low; - adv_phy_type_hi = vsi->port_info->phy.phy_type_high; + if (!advert_phy_type_lo && !advert_phy_type_hi) { + advert_phy_type_lo = vsi->port_info->phy.phy_type_low; + advert_phy_type_hi = vsi->port_info->phy.phy_type_high; } ethtool_link_ksettings_zero_link_mode(ks, supported); @@ -2000,7 +1997,7 @@ ice_get_link_ksettings(struct net_device *netdev, return -ENOMEM; status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) { err = -EIO; goto done; @@ -2037,7 +2034,7 @@ ice_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, caps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EIO; goto done; @@ -2204,13 +2201,14 @@ ice_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *ks) { struct ice_netdev_priv *np = netdev_priv(netdev); - struct ethtool_link_ksettings safe_ks, copy_ks; - struct ice_aqc_get_phy_caps_data *abilities; u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT; - u16 adv_link_speed, curr_link_speed, idx; + struct ethtool_link_ksettings copy_ks = *ks; + struct ethtool_link_ksettings safe_ks = {}; + struct ice_aqc_get_phy_caps_data *phy_caps; struct ice_aqc_set_phy_cfg_data config; + u16 adv_link_speed, curr_link_speed; struct ice_pf *pf = np->vsi->back; - struct ice_port_info *p; + struct ice_port_info *pi; u8 autoneg_changed = 0; enum ice_status status; u64 phy_type_high = 0; @@ -2218,46 +2216,37 @@ ice_set_link_ksettings(struct net_device *netdev, int err = 0; bool linkup; - p = np->vsi->port_info; - - if (!p) - return -EOPNOTSUPP; + pi = np->vsi->port_info; - /* Check if this is LAN VSI */ - ice_for_each_vsi(pf, idx) - if (pf->vsi[idx]->type == ICE_VSI_PF) { - if (np->vsi != pf->vsi[idx]) - return -EOPNOTSUPP; - break; - } + if (!pi) + return -EIO; - if (p->phy.media_type != ICE_MEDIA_BASET && - p->phy.media_type != ICE_MEDIA_FIBER && - p->phy.media_type != ICE_MEDIA_BACKPLANE && - p->phy.media_type != ICE_MEDIA_DA && - p->phy.link_info.link_info & ICE_AQ_LINK_UP) + if (pi->phy.media_type != ICE_MEDIA_BASET && + pi->phy.media_type != ICE_MEDIA_FIBER && + pi->phy.media_type != ICE_MEDIA_BACKPLANE && + pi->phy.media_type != ICE_MEDIA_DA && + pi->phy.link_info.link_info & ICE_AQ_LINK_UP) return -EOPNOTSUPP; - abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); - if (!abilities) + phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL); + if (!phy_caps) return -ENOMEM; /* Get the PHY capabilities based on media */ - status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP, - abilities, NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + phy_caps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + phy_caps, NULL); if (status) { - err = -EAGAIN; + err = -EIO; goto done; } - /* copy the ksettings to copy_ks to avoid modifying the original */ - memcpy(©_ks, ks, sizeof(copy_ks)); - /* save autoneg out of ksettings */ autoneg = copy_ks.base.autoneg; - memset(&safe_ks, 0, sizeof(safe_ks)); - /* Get link modes supported by hardware.*/ ice_phy_type_to_ethtool(netdev, &safe_ks); @@ -2269,7 +2258,7 @@ ice_set_link_ksettings(struct net_device *netdev, __ETHTOOL_LINK_MODE_MASK_NBITS)) { if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EINVAL; + err = -EOPNOTSUPP; goto done; } @@ -2293,7 +2282,7 @@ ice_set_link_ksettings(struct net_device *netdev, goto done; } - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) { err = -EBUSY; @@ -2306,26 +2295,26 @@ ice_set_link_ksettings(struct net_device *netdev, * configuration is initialized during probe from PHY capabilities * software mode, and updated on set PHY configuration. */ - memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config)); + config = pi->phy.curr_user_phy_cfg; config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; /* Check autoneg */ - err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed, + err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed, netdev); if (err) goto done; /* Call to get the current link speed */ - p->phy.get_link_info = true; - status = ice_get_link_status(p, &linkup); + pi->phy.get_link_info = true; + status = ice_get_link_status(pi, &linkup); if (status) { - err = -EAGAIN; + err = -EIO; goto done; } - curr_link_speed = p->phy.link_info.link_speed; + curr_link_speed = pi->phy.link_info.link_speed; adv_link_speed = ice_ksettings_find_adv_link_speed(ks); /* If speed didn't get set, set it to what it currently is. @@ -2344,7 +2333,7 @@ ice_set_link_ksettings(struct net_device *netdev, } /* save the requested speeds */ - p->phy.link_info.req_speeds = adv_link_speed; + pi->phy.link_info.req_speeds = adv_link_speed; /* set link and auto negotiation so changes take effect */ config.caps |= ICE_AQ_PHY_ENA_LINK; @@ -2352,7 +2341,7 @@ ice_set_link_ksettings(struct net_device *netdev, /* check if there is a PHY type for the requested advertised speed */ if (!(phy_type_low || phy_type_high)) { netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EAGAIN; + err = -EOPNOTSUPP; goto done; } @@ -2360,9 +2349,9 @@ ice_set_link_ksettings(struct net_device *netdev, * for set PHY configuration */ config.phy_type_high = cpu_to_le64(phy_type_high) & - abilities->phy_type_high; + phy_caps->phy_type_high; config.phy_type_low = cpu_to_le64(phy_type_low) & - abilities->phy_type_low; + phy_caps->phy_type_low; if (!(config.phy_type_high || config.phy_type_low)) { /* If there is no intersection and lenient mode is enabled, then @@ -2376,13 +2365,13 @@ ice_set_link_ksettings(struct net_device *netdev, pf->nvm_phy_type_lo; } else { netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EAGAIN; + err = -EOPNOTSUPP; goto done; } } /* If link is up put link down */ - if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) { + if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) { /* Tell the OS link is going down, the link will go * back up when fw says it is ready asynchronously */ @@ -2392,18 +2381,18 @@ ice_set_link_ksettings(struct net_device *netdev, } /* make the aq call */ - status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL); if (status) { netdev_info(netdev, "Set phy config failed,\n"); - err = -EAGAIN; + err = -EIO; goto done; } /* Save speed request */ - p->phy.curr_user_speed_req = adv_link_speed; + pi->phy.curr_user_speed_req = adv_link_speed; done: - kfree(abilities); - clear_bit(__ICE_CFG_BUSY, pf->state); + kfree(phy_caps); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -2759,7 +2748,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) if (ice_xsk_any_rx_ring_ena(vsi)) return -EBUSY; - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -2938,7 +2927,7 @@ free_tx: } done: - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -2972,7 +2961,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) goto out; @@ -3039,7 +3028,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return -ENOMEM; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { kfree(pcaps); @@ -3057,7 +3046,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) } /* If we have link and don't have autoneg */ - if (!test_bit(__ICE_DOWN, pf->state) && + if (!test_bit(ICE_DOWN, pf->state) && !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { /* Send message that it might not necessarily work*/ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); @@ -3453,7 +3442,7 @@ static void ice_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n"); /* Get WoL settings based on the HW capability */ - if (ice_is_wol_supported(pf)) { + if (ice_is_wol_supported(&pf->hw)) { wol->supported = WAKE_MAGIC; wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0; } else { @@ -3473,7 +3462,7 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(pf)) + if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw)) return -EOPNOTSUPP; /* only magic packet is supported */ @@ -3521,13 +3510,13 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, switch (c_type) { case ICE_RX_CONTAINER: - ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); - ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); + ec->rx_coalesce_usecs = rc->itr_setting; ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl; break; case ICE_TX_CONTAINER: - ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); - ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc); + ec->tx_coalesce_usecs = rc->itr_setting; break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); @@ -3645,11 +3634,16 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } + if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl && + (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { + netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", + c_type_str); + return -EINVAL; + } if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx), - ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high, - pf->hw.intrl_gran)); + ice_write_intrl(rc->ring->q_vector, + ec->rx_coalesce_usecs_high); } use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; @@ -3667,7 +3661,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, return -EINVAL; } - itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; + itr_setting = rc->itr_setting; if (coalesce_usecs != itr_setting && use_adaptive_coalesce) { netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", c_type_str, c_type_str); @@ -3681,12 +3675,18 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, } if (use_adaptive_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; + rc->itr_mode = ITR_DYNAMIC; } else { - /* save the user set usecs */ + rc->itr_mode = ITR_STATIC; + /* store user facing value how it was set */ rc->itr_setting = coalesce_usecs; - /* device ITR granularity is in 2 usec increments */ - rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); + /* write the change to the register */ + ice_write_itr(rc, coalesce_usecs); + /* force writes to take effect immediately, the flush shouldn't + * be done in the functions above because the intent is for + * them to do lazy writes. + */ + ice_flush(&pf->hw); } return 0; @@ -3748,8 +3748,6 @@ ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, if (use_adaptive_coalesce) return; - itr_setting = ITR_TO_REG(itr_setting); - if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", c_type_str, coalesce_usecs, c_type_str, @@ -3804,7 +3802,6 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, return -EINVAL; set_complete: - return 0; } @@ -3917,30 +3914,33 @@ ice_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct ice_netdev_priv *np = netdev_priv(netdev); +#define SFF_READ_BLOCK_SIZE 8 + u8 value[SFF_READ_BLOCK_SIZE] = { 0 }; u8 addr = ICE_I2C_EEPROM_DEV_ADDR; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; bool is_sfp = false; - unsigned int i; + unsigned int i, j; u16 offset = 0; - u8 value = 0; u8 page = 0; if (!ee || !ee->len || !data) return -EINVAL; - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0, + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, value, 1, 0, NULL); if (status) return -EIO; - if (value == ICE_MODULE_TYPE_SFP) + if (value[0] == ICE_MODULE_TYPE_SFP) is_sfp = true; - for (i = 0; i < ee->len; i++) { + memset(data, 0, ee->len); + for (i = 0; i < ee->len; i += SFF_READ_BLOCK_SIZE) { offset = i + ee->offset; + page = 0; /* Check if we need to access the other memory page */ if (is_sfp) { @@ -3956,11 +3956,37 @@ ice_get_module_eeprom(struct net_device *netdev, } } - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp, - &value, 1, 0, NULL); - if (status) - value = 0; - data[i] = value; + /* Bit 2 of EEPROM address 0x02 declares upper + * pages are disabled on QSFP modules. + * SFP modules only ever use page 0. + */ + if (page == 0 || !(data[0x2] & 0x4)) { + /* If i2c bus is busy due to slow page change or + * link management access, call can fail. This is normal. + * So we retry this a few times. + */ + for (j = 0; j < 4; j++) { + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, + !is_sfp, value, + SFF_READ_BLOCK_SIZE, + 0, NULL); + netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n", + addr, offset, page, is_sfp, + value[0], value[1], value[2], value[3], + value[4], value[5], value[6], value[7], + status); + if (status) { + usleep_range(1500, 2500); + memset(value, 0, SFF_READ_BLOCK_SIZE); + continue; + } + break; + } + + /* Make sure we have enough room for the new block */ + if ((i + SFF_READ_BLOCK_SIZE) < ee->len) + memcpy(data + i, value, SFF_READ_BLOCK_SIZE); + } } return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 440964defa4a..16de603b280c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1452,7 +1452,7 @@ int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) return -EBUSY; } - if (test_bit(__ICE_FD_FLUSH_REQ, pf->state)) + if (test_bit(ICE_FD_FLUSH_REQ, pf->state)) return -EBUSY; mutex_lock(&hw->fdir_fltr_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 4b83960876f4..06ac9badee77 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -334,6 +334,7 @@ ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset) if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM) return NULL; + /* cppcheck-suppress nullPointer */ if (index > ICE_MAX_BST_TCAMS_IN_BUF) return NULL; @@ -404,6 +405,7 @@ ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index, if (!section) return NULL; + /* cppcheck-suppress nullPointer */ if (index > ICE_MAX_LABELS_IN_BUF) return NULL; @@ -2067,6 +2069,7 @@ ice_match_prop_lst(struct list_head *list1, struct list_head *list2) count++; list_for_each_entry(tmp2, list2, list) chk_count++; + /* cppcheck-suppress knownConditionTrueFalse */ if (!count || count != chk_count) return false; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 67b5b9b9d009..de38a0fc9665 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -130,6 +130,7 @@ #define GLINT_DYN_CTL_ITR_INDX_M ICE_M(0x3, 3) #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) +#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 6041ca2830de..82e2ce23df3d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -158,6 +158,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (vsi->type == ICE_VSI_VF) vsi->vf_id = vf_id; + else + vsi->vf_id = ICE_INVAL_VFID; switch (vsi->type) { case ICE_VSI_PF: @@ -385,6 +387,8 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED; + q_vector->total_events++; + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -1309,14 +1313,13 @@ err_out: * LUT, while in the event of enable request for RSS, it will reconfigure RSS * LUT. */ -int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) +void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) { - int err = 0; u8 *lut; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) - return -ENOMEM; + return; if (ena) { if (vsi->rss_lut_user) @@ -1326,9 +1329,8 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) vsi->rss_size); } - err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + ice_set_rss_lut(vsi, lut, vsi->rss_table_size); kfree(lut); - return err; } /** @@ -1502,13 +1504,13 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi) */ bool ice_pf_state_is_nominal(struct ice_pf *pf) { - DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 }; + DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 }; if (!pf) return false; - bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS); - if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS)) + bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS); + if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS)) return false; return true; @@ -1773,7 +1775,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) * This function converts a decimal interrupt rate limit in usecs to the format * expected by firmware. */ -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) +static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) { u32 val = intrl / gran; @@ -1783,6 +1785,51 @@ u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) } /** + * ice_write_intrl - write throttle rate limit to interrupt specific register + * @q_vector: pointer to interrupt specific structure + * @intrl: throttle rate limit in microseconds to write + */ +void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + wr32(hw, GLINT_RATE(q_vector->reg_idx), + ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25)); +} + +/** + * __ice_write_itr - write throttle rate to register + * @q_vector: pointer to interrupt data structure + * @rc: pointer to ring container + * @itr: throttle rate in microseconds to write + */ +static void __ice_write_itr(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, u16 itr) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S); +} + +/** + * ice_write_itr - write throttle rate to queue specific register + * @rc: pointer to ring container + * @itr: throttle rate in microseconds to write + */ +void ice_write_itr(struct ice_ring_container *rc, u16 itr) +{ + struct ice_q_vector *q_vector; + + if (!rc->ring) + return; + + q_vector = rc->ring->q_vector; + + __ice_write_itr(q_vector, rc, itr); +} + +/** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured * @@ -1802,9 +1849,6 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) ice_cfg_itr(hw, q_vector); - wr32(hw, GLINT_RATE(reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); - /* Both Transmit Queue Interrupt Cause Control register * and Receive Queue Interrupt Cause control register * expects MSIX_INDX field to be the vector index @@ -2492,11 +2536,10 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; - u16 reg_idx = q_vector->reg_idx; - wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0); - wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0); + ice_write_intrl(q_vector, 0); for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_write_itr(&q_vector->tx, 0); wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); if (ice_is_xdp_ena_vsi(vsi)) { u32 xdp_txq = txq + vsi->num_xdp_txq; @@ -2507,6 +2550,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) } for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_write_itr(&q_vector->rx, 0); wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0); rxq++; } @@ -2620,7 +2664,7 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked) if (!locked) rtnl_lock(); - err = ice_open(vsi->netdev); + err = ice_open_internal(vsi->netdev); if (!locked) rtnl_unlock(); @@ -2649,7 +2693,7 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) if (!locked) rtnl_lock(); - ice_stop(vsi->netdev); + ice_vsi_close(vsi); if (!locked) rtnl_unlock(); @@ -2752,11 +2796,14 @@ int ice_vsi_release(struct ice_vsi *vsi) * PF that is running the work queue items currently. This is done to * avoid check_flush_dependency() warning on this wq */ - if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) { + if (vsi->netdev && !ice_is_reset_in_progress(pf->state) && + (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) { unregister_netdev(vsi->netdev); - ice_devlink_destroy_port(vsi); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); } + ice_devlink_destroy_port(vsi); + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); @@ -2811,10 +2858,16 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); - /* make sure unregister_netdev() was called by checking __ICE_DOWN */ - if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) { - free_netdev(vsi->netdev); - vsi->netdev = NULL; + if (vsi->netdev) { + if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) { + unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); + } + if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); + } } if (vsi->type == ICE_VSI_VF && @@ -2835,47 +2888,6 @@ int ice_vsi_release(struct ice_vsi *vsi) } /** - * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector - * @q_vector: pointer to q_vector which is being updated - * @stored_intrl_setting: original INTRL setting - * - * Set coalesce param in q_vector and update these parameters in HW. - */ -static void -ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector, - u16 stored_intrl_setting) -{ - struct ice_hw *hw = &q_vector->vsi->back->hw; - - q_vector->intrl = stored_intrl_setting; - wr32(hw, GLINT_RATE(q_vector->reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); -} - -/** - * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector - * @q_vector: pointer to q_vector which is being updated - * @rc: pointer to ring container - * @stored_itr_setting: original ITR setting - * - * Set coalesce param in q_vector and update these parameters in HW. - */ -static void -ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector, - struct ice_ring_container *rc, - u16 stored_itr_setting) -{ - struct ice_hw *hw = &q_vector->vsi->back->hw; - - rc->itr_setting = stored_itr_setting; - - /* dynamic ITR values will be updated during Tx/Rx */ - if (!ITR_IS_DYNAMIC(rc->itr_setting)) - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S); -} - -/** * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors * @vsi: VSI connected with q_vectors * @coalesce: array of struct with stored coalesce @@ -2918,6 +2930,7 @@ static void ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, struct ice_coalesce_stored *coalesce, int size) { + struct ice_ring_container *rc; int i; if ((size && !coalesce) || !vsi) @@ -2940,41 +2953,51 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, * rings is less than are allocated (this means the number of * rings increased from previously), then write out the * values in the first element + * + * Also, always write the ITR, even if in ITR_IS_DYNAMIC + * as there is no harm because the dynamic algorithm + * will just overwrite. */ - if (i < vsi->alloc_rxq && coalesce[i].rx_valid) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->rx, - coalesce[i].itr_rx); - else if (i < vsi->alloc_rxq) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->rx, - coalesce[0].itr_rx); - - if (i < vsi->alloc_txq && coalesce[i].tx_valid) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->tx, - coalesce[i].itr_tx); - else if (i < vsi->alloc_txq) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->tx, - coalesce[0].itr_tx); - - ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], - coalesce[i].intrl); + if (i < vsi->alloc_rxq && coalesce[i].rx_valid) { + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[i].itr_rx; + ice_write_itr(rc, rc->itr_setting); + } else if (i < vsi->alloc_rxq) { + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[0].itr_rx; + ice_write_itr(rc, rc->itr_setting); + } + + if (i < vsi->alloc_txq && coalesce[i].tx_valid) { + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[i].itr_tx; + ice_write_itr(rc, rc->itr_setting); + } else if (i < vsi->alloc_txq) { + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[0].itr_tx; + ice_write_itr(rc, rc->itr_setting); + } + + vsi->q_vectors[i]->intrl = coalesce[i].intrl; + ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); } /* the number of queue vectors increased so write whatever is in * the first element */ for (; i < vsi->num_q_vectors; i++) { - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->tx, - coalesce[0].itr_tx); - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->rx, - coalesce[0].itr_rx); - ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], - coalesce[0].intrl); + /* transmit */ + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[0].itr_tx; + ice_write_itr(rc, rc->itr_setting); + + /* receive */ + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[0].itr_rx; + ice_write_itr(rc, rc->itr_setting); + + vsi->q_vectors[i]->intrl = coalesce[0].intrl; + ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); } } @@ -2991,6 +3014,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) struct ice_coalesce_stored *coalesce; int prev_num_q_vectors = 0; struct ice_vf *vf = NULL; + enum ice_vsi_type vtype; enum ice_status status; struct ice_pf *pf; int ret, i; @@ -2999,7 +3023,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return -EINVAL; pf = vsi->back; - if (vsi->type == ICE_VSI_VF) + vtype = vsi->type; + if (vtype == ICE_VSI_VF) vf = &pf->vf[vsi->vf_id]; coalesce = kcalloc(vsi->num_q_vectors, @@ -3017,7 +3042,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type != ICE_VSI_VF) { + if (vtype != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; @@ -3032,7 +3057,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_put_qs(vsi); ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); - if (vsi->type == ICE_VSI_VF) + if (vtype == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf->vf_id); else ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); @@ -3051,7 +3076,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret < 0) goto err_vsi; - switch (vsi->type) { + switch (vtype) { case ICE_VSI_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); @@ -3078,7 +3103,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) goto err_vectors; } /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ - if (vsi->type != ICE_VSI_CTRL) + if (vtype != ICE_VSI_CTRL) /* Do not exit if configuring RSS had an issue, at * least receive traffic on first queue. Hence no * need to capture return value @@ -3140,7 +3165,7 @@ err_rings: } err_vsi: ice_vsi_clear(vsi); - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); kfree(coalesce); return ret; } @@ -3151,11 +3176,10 @@ err_vsi: */ bool ice_is_reset_in_progress(unsigned long *state) { - return test_bit(__ICE_RESET_OICR_RECV, state) || - test_bit(__ICE_DCBNL_DEVRESET, state) || - test_bit(__ICE_PFR_REQ, state) || - test_bit(__ICE_CORER_REQ, state) || - test_bit(__ICE_GLOBR_REQ, state); + return test_bit(ICE_RESET_OICR_RECV, state) || + test_bit(ICE_PFR_REQ, state) || + test_bit(ICE_CORER_REQ, state) || + test_bit(ICE_GLOBR_REQ, state); } #ifdef CONFIG_DCB @@ -3243,20 +3267,15 @@ out: /** * ice_update_ring_stats - Update ring statistics * @ring: ring to update - * @cont: used to increment per-vector counters * @pkts: number of processed packets * @bytes: number of processed bytes * * This function assumes that caller has acquired a u64_stats_sync lock. */ -static void -ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, - u64 pkts, u64 bytes) +static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) { ring->stats.bytes += bytes; ring->stats.pkts += pkts; - cont->total_bytes += bytes; - cont->total_pkts += pkts; } /** @@ -3268,7 +3287,7 @@ ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes); + ice_update_ring_stats(tx_ring, pkts, bytes); u64_stats_update_end(&tx_ring->syncp); } @@ -3281,7 +3300,7 @@ void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes); + ice_update_ring_stats(rx_ring, pkts, bytes); u64_stats_update_end(&rx_ring->syncp); } @@ -3423,3 +3442,40 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) return 0; } + +/** + * ice_set_link - turn on/off physical link + * @vsi: VSI to modify physical link on + * @ena: turn on/off physical link + */ +int ice_set_link(struct ice_vsi *vsi, bool ena) +{ + struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_port_info *pi = vsi->port_info; + struct ice_hw *hw = pi->hw; + enum ice_status status; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + status = ice_aq_set_link_restart_an(pi, ena, NULL); + + /* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE. + * this is not a fatal error, so print a warning message and return + * a success code. Return an error if FW returns an error code other + * than ICE_AQ_RC_EMODE + */ + if (status == ICE_ERR_AQ_ERROR) { + if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE) + dev_warn(dev, "can't set link to %s, err %s aq_err %s. not fatal, continuing\n", + (ena ? "ON" : "OFF"), ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + } else if (status) { + dev_err(dev, "can't set link to %s, err %s aq_err %s\n", + (ena ? "ON" : "OFF"), ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; + } + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 3da17895a2b1..511c2316c40c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -45,6 +45,8 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); +int ice_set_link(struct ice_vsi *vsi, bool ena); + #ifdef CONFIG_DCB int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); #endif /* CONFIG_DCB */ @@ -83,7 +85,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi); void ice_vsi_free_tx_rings(struct ice_vsi *vsi); -int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); @@ -93,7 +95,8 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); int ice_status_to_errno(enum ice_status err); -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); +void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); +void ice_write_itr(struct ice_ring_container *rc, u16 itr); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b3c1cadecf21..6dbaa9099fdf 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -257,7 +257,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) if (!vsi->netdev) return -EINVAL; - while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) usleep_range(1000, 2000); changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; @@ -307,7 +307,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) * space reserved for promiscuous filters. */ if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && - !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, + !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC, vsi->state)) { promisc_forced_on = true; netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n", @@ -391,7 +391,7 @@ out: set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); exit: - clear_bit(__ICE_CFG_BUSY, vsi->state); + clear_bit(ICE_CFG_BUSY, vsi->state); return err; } @@ -436,7 +436,6 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++) pf->vf_agg_node[node].num_vsis = 0; - } /** @@ -452,7 +451,7 @@ ice_prepare_for_reset(struct ice_pf *pf) unsigned int i; /* already prepared for reset */ - if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) + if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; /* Notify VFs of impending reset */ @@ -473,7 +472,7 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_shutdown_all_ctrlq(hw); - set_bit(__ICE_PREPARED_FOR_RESET, pf->state); + set_bit(ICE_PREPARED_FOR_RESET, pf->state); } /** @@ -494,12 +493,12 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) /* trigger the reset */ if (ice_reset(hw, reset_type)) { dev_err(dev, "reset %d failed\n", reset_type); - set_bit(__ICE_RESET_FAILED, pf->state); - clear_bit(__ICE_RESET_OICR_RECV, pf->state); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); - clear_bit(__ICE_CORER_REQ, pf->state); - clear_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); + clear_bit(ICE_RESET_OICR_RECV, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); + clear_bit(ICE_CORER_REQ, pf->state); + clear_bit(ICE_GLOBR_REQ, pf->state); return; } @@ -510,8 +509,8 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (reset_type == ICE_RESET_PFR) { pf->pfr_count++; ice_rebuild(pf, reset_type); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); ice_reset_all_vfs(pf, true); } } @@ -527,20 +526,20 @@ static void ice_reset_subtask(struct ice_pf *pf) /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an * OICR interrupt. The OICR handler (ice_misc_intr) determines what type * of reset is pending and sets bits in pf->state indicating the reset - * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set + * type and ICE_RESET_OICR_RECV. So, if the latter bit is set * prepare for pending reset if not already (for PF software-initiated * global resets the software should already be prepared for it as - * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated + * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated * by firmware or software on other PFs, that bit is not set so prepare * for the reset now), poll for reset done, rebuild and return. */ - if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { + if (test_bit(ICE_RESET_OICR_RECV, pf->state)) { /* Perform the largest reset requested */ - if (test_and_clear_bit(__ICE_CORER_RECV, pf->state)) + if (test_and_clear_bit(ICE_CORER_RECV, pf->state)) reset_type = ICE_RESET_CORER; - if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) + if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state)) reset_type = ICE_RESET_GLOBR; - if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state)) + if (test_and_clear_bit(ICE_EMPR_RECV, pf->state)) reset_type = ICE_RESET_EMPR; /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) @@ -549,7 +548,7 @@ static void ice_reset_subtask(struct ice_pf *pf) /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); } else { /* done with reset. start rebuild */ pf->hw.reset_ongoing = false; @@ -557,11 +556,11 @@ static void ice_reset_subtask(struct ice_pf *pf) /* clear bit to resume normal operations, but * ICE_NEEDS_RESTART bit is set in case rebuild failed */ - clear_bit(__ICE_RESET_OICR_RECV, pf->state); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); - clear_bit(__ICE_CORER_REQ, pf->state); - clear_bit(__ICE_GLOBR_REQ, pf->state); + clear_bit(ICE_RESET_OICR_RECV, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); + clear_bit(ICE_CORER_REQ, pf->state); + clear_bit(ICE_GLOBR_REQ, pf->state); ice_reset_all_vfs(pf, true); } @@ -569,19 +568,19 @@ static void ice_reset_subtask(struct ice_pf *pf) } /* No pending resets to finish processing. Check for new resets */ - if (test_bit(__ICE_PFR_REQ, pf->state)) + if (test_bit(ICE_PFR_REQ, pf->state)) reset_type = ICE_RESET_PFR; - if (test_bit(__ICE_CORER_REQ, pf->state)) + if (test_bit(ICE_CORER_REQ, pf->state)) reset_type = ICE_RESET_CORER; - if (test_bit(__ICE_GLOBR_REQ, pf->state)) + if (test_bit(ICE_GLOBR_REQ, pf->state)) reset_type = ICE_RESET_GLOBR; /* If no valid reset type requested just return */ if (reset_type == ICE_RESET_INVAL) return; /* reset if not already down or busy */ - if (!test_bit(__ICE_DOWN, pf->state) && - !test_bit(__ICE_CFG_BUSY, pf->state)) { + if (!test_bit(ICE_DOWN, pf->state) && + !test_bit(ICE_CFG_BUSY, pf->state)) { ice_do_reset(pf, reset_type); } } @@ -720,7 +719,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) netdev_info(vsi->netdev, "Get phy capability failed.\n"); @@ -873,10 +872,10 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, { struct device *dev = ice_pf_to_dev(pf); struct ice_phy_info *phy_info; + enum ice_status status; struct ice_vsi *vsi; u16 old_link_speed; bool old_link; - int result; phy_info = &pi->phy; phy_info->link_info_old = phy_info->link_info; @@ -887,10 +886,11 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, /* update the link info structures and re-enable link events, * don't bail on failure due to other book keeping needed */ - result = ice_update_link_info(pi); - if (result) - dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n", - pi->lport); + status = ice_update_link_info(pi); + if (status) + dev_dbg(dev, "Failed to update link status on port %d, err %s aq_err %s\n", + pi->lport, ice_stat_str(status), + ice_aq_str(pi->hw->adminq.sq_last_status)); /* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. @@ -906,18 +906,12 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) && !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { set_bit(ICE_FLAG_NO_MEDIA, pf->flags); - - result = ice_aq_set_link_restart_an(pi, false, NULL); - if (result) { - dev_dbg(dev, "Failed to set link down, VSI %d error %d\n", - vsi->vsi_num, result); - return result; - } + ice_set_link(vsi, false); } /* if the old link up/down and speed is the same as the new */ if (link_up == old_link && link_speed == old_link_speed) - return result; + return 0; if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -931,7 +925,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, ice_vc_notify_link_state(pf); - return result; + return 0; } /** @@ -943,8 +937,8 @@ static void ice_watchdog_subtask(struct ice_pf *pf) int i; /* if interface is down do nothing */ - if (test_bit(__ICE_DOWN, pf->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) + if (test_bit(ICE_DOWN, pf->state) || + test_bit(ICE_CFG_BUSY, pf->state)) return; /* make sure we don't do these things too often */ @@ -1188,7 +1182,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) u32 oldval, val; /* Do not clean control queue if/when PF reset fails */ - if (test_bit(__ICE_RESET_FAILED, pf->state)) + if (test_bit(ICE_RESET_FAILED, pf->state)) return 0; switch (q_type) { @@ -1323,13 +1317,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) return; if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN)) return; - clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); /* There might be a situation where new messages arrive to a control * queue between processing the last message and clearing the @@ -1350,13 +1344,13 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state)) + if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state)) return; if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX)) return; - clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); if (ice_ctrlq_pending(hw, &hw->mailboxq)) __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX); @@ -1372,9 +1366,9 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) */ void ice_service_task_schedule(struct ice_pf *pf) { - if (!test_bit(__ICE_SERVICE_DIS, pf->state) && - !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) && - !test_bit(__ICE_NEEDS_RESTART, pf->state)) + if (!test_bit(ICE_SERVICE_DIS, pf->state) && + !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) && + !test_bit(ICE_NEEDS_RESTART, pf->state)) queue_work(ice_wq, &pf->serv_task); } @@ -1384,32 +1378,32 @@ void ice_service_task_schedule(struct ice_pf *pf) */ static void ice_service_task_complete(struct ice_pf *pf) { - WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state)); + WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state)); /* force memory (pf->state) to sync before next service task */ smp_mb__before_atomic(); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); } /** * ice_service_task_stop - stop service task and cancel works * @pf: board private structure * - * Return 0 if the __ICE_SERVICE_DIS bit was not already set, + * Return 0 if the ICE_SERVICE_DIS bit was not already set, * 1 otherwise. */ static int ice_service_task_stop(struct ice_pf *pf) { int ret; - ret = test_and_set_bit(__ICE_SERVICE_DIS, pf->state); + ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state); if (pf->serv_tmr.function) del_timer_sync(&pf->serv_tmr); if (pf->serv_task.func) cancel_work_sync(&pf->serv_task); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); return ret; } @@ -1421,7 +1415,7 @@ static int ice_service_task_stop(struct ice_pf *pf) */ static void ice_service_task_restart(struct ice_pf *pf) { - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); ice_service_task_schedule(pf); } @@ -1454,7 +1448,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) unsigned int i; u32 reg; - if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) { + if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { /* Since the VF MDD event logging is rate limited, check if * there are pending MDD events. */ @@ -1546,7 +1540,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", i); @@ -1556,7 +1550,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TCLAN_VALID_M) { wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", i); @@ -1566,7 +1560,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TDPU_VALID_M) { wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", i); @@ -1576,7 +1570,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_RX_VALID_M) { wr32(hw, VP_MDET_RX(i), 0xFFFF); vf->mdd_rx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", i); @@ -1631,7 +1625,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) if (!pcaps) return -ENOMEM; - retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (retcode) { dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", @@ -1691,7 +1685,7 @@ static int ice_init_nvm_phy_type(struct ice_port_info *pi) if (!pcaps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps, NULL); if (status) { @@ -1737,15 +1731,18 @@ static void ice_init_link_dflt_override(struct ice_port_info *pi) * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings * @pi: port info structure * - * If default override is enabled, initialized the user PHY cfg speed and FEC + * If default override is enabled, initialize the user PHY cfg speed and FEC * settings using the default override mask from the NVM. * * The PHY should only be configured with the default override settings the - * first time media is available. The __ICE_LINK_DEFAULT_OVERRIDE_PENDING state + * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state * is used to indicate that the user PHY cfg default override is initialized * and the PHY has not been configured with the default override settings. The * state is set here, and cleared in ice_configure_phy the first time the PHY is * configured. + * + * This function should be called only if the FW doesn't support default + * configuration mode, as reported by ice_fw_supports_report_dflt_cfg. */ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) { @@ -1770,7 +1767,7 @@ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) cfg->link_fec_opt = ldo->fec_options; phy->curr_user_fec_req = ICE_FEC_AUTO; - set_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); + set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); } /** @@ -1793,22 +1790,21 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) struct ice_phy_info *phy = &pi->phy; struct ice_pf *pf = pi->hw->back; enum ice_status status; - struct ice_vsi *vsi; int err = 0; if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) return -EIO; - vsi = ice_get_main_vsi(pf); - if (!vsi) - return -EINVAL; - pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + pcaps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + pcaps, NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n"); err = -EIO; @@ -1818,22 +1814,24 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg); /* check if lenient mode is supported and enabled */ - if (ice_fw_supports_link_override(&vsi->back->hw) && + if (ice_fw_supports_link_override(pi->hw) && !(pcaps->module_compliance_enforcement & ICE_AQC_MOD_ENFORCE_STRICT_MODE)) { set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags); - /* if link default override is enabled, initialize user PHY - * configuration with link default override values + /* if the FW supports default PHY configuration mode, then the driver + * does not have to apply link override settings. If not, + * initialize user PHY configuration with link override values */ - if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) { + if (!ice_fw_supports_report_dflt_cfg(pi->hw) && + (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) { ice_init_phy_cfg_dflt_override(pi); goto out; } } - /* if link default override is not enabled, initialize PHY using - * topology with media + /* if link default override is not enabled, set user flow control and + * FEC settings based on what get_phy_caps returned */ phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps, pcaps->link_fec_options); @@ -1841,7 +1839,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) out: phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M; - set_bit(__ICE_PHY_INIT_COMPLETE, pf->state); + set_bit(ICE_PHY_INIT_COMPLETE, pf->state); err_out: kfree(pcaps); return err; @@ -1858,27 +1856,24 @@ err_out: static int ice_configure_phy(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_port_info *pi = vsi->port_info; struct ice_aqc_get_phy_caps_data *pcaps; struct ice_aqc_set_phy_cfg_data *cfg; - struct ice_port_info *pi; + struct ice_phy_info *phy = &pi->phy; + struct ice_pf *pf = vsi->back; enum ice_status status; int err = 0; - pi = vsi->port_info; - if (!pi) - return -EINVAL; - /* Ensure we have media as we cannot configure a medialess port */ - if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) + if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) return -EPERM; ice_print_topo_conflict(vsi); - if (vsi->port_info->phy.link_info.topo_media_conflict == - ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) + if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) return -EPERM; - if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) + if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) return ice_force_phys_link_state(vsi, true); pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); @@ -1886,7 +1881,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) return -ENOMEM; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n", @@ -1899,15 +1894,19 @@ static int ice_configure_phy(struct ice_vsi *vsi) * there's nothing to do */ if (pcaps->caps & ICE_AQC_PHY_EN_LINK && - ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg)) + ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg)) goto done; /* Use PHY topology as baseline for configuration */ memset(pcaps, 0, sizeof(*pcaps)); - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + pcaps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + pcaps, NULL); if (status) { - dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n", + dev_err(dev, "Failed to get PHY caps, VSI %d error %s\n", vsi->vsi_num, ice_stat_str(status)); err = -EIO; goto done; @@ -1924,10 +1923,10 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Speed - If default override pending, use curr_user_phy_cfg set in * ice_init_phy_user_cfg_ldo. */ - if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, + if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, vsi->back->state)) { - cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low; - cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high; + cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low; + cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high; } else { u64 phy_low = 0, phy_high = 0; @@ -1945,7 +1944,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) } /* FEC */ - ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req); + ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req); /* Can't provide what was requested; use PHY capabilities */ if (cfg->link_fec_opt != @@ -1957,12 +1956,12 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Flow Control - always supported; no need to check against * capabilities */ - ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req); + ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req); /* Enable link and link update */ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK; - status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL); if (status) { dev_err(dev, "Failed to set phy config, VSI %d error %s\n", vsi->vsi_num, ice_stat_str(status)); @@ -2003,7 +2002,7 @@ static void ice_check_media_subtask(struct ice_pf *pf) return; if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { - if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) + if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) ice_init_phy_user_cfg(pi); /* PHY settings are reset on media insertion, reconfigure @@ -2039,8 +2038,8 @@ static void ice_service_task(struct work_struct *work) /* bail if a reset/recovery cycle is pending or rebuild failed */ if (ice_is_reset_in_progress(pf->state) || - test_bit(__ICE_SUSPENDED, pf->state) || - test_bit(__ICE_NEEDS_RESTART, pf->state)) { + test_bit(ICE_SUSPENDED, pf->state) || + test_bit(ICE_NEEDS_RESTART, pf->state)) { ice_service_task_complete(pf); return; } @@ -2061,7 +2060,8 @@ static void ice_service_task(struct work_struct *work) ice_clean_mailboxq_subtask(pf); ice_sync_arfs_fltrs(pf); ice_flush_fdir_ctx(pf); - /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ + + /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */ ice_service_task_complete(pf); /* If the tasks have taken longer than one service timer period @@ -2069,11 +2069,11 @@ static void ice_service_task(struct work_struct *work) * schedule the service task now. */ if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || - test_bit(__ICE_MDD_EVENT_PENDING, pf->state) || - test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || - test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) || - test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) || - test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + test_bit(ICE_MDD_EVENT_PENDING, pf->state) || + test_bit(ICE_VFLR_EVENT_PENDING, pf->state) || + test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) || + test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) || + test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@ -2103,7 +2103,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) struct device *dev = ice_pf_to_dev(pf); /* bail out if earlier reset has failed */ - if (test_bit(__ICE_RESET_FAILED, pf->state)) { + if (test_bit(ICE_RESET_FAILED, pf->state)) { dev_dbg(dev, "earlier reset has failed\n"); return -EIO; } @@ -2115,13 +2115,13 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) switch (reset) { case ICE_RESET_PFR: - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); break; case ICE_RESET_CORER: - set_bit(__ICE_CORER_REQ, pf->state); + set_bit(ICE_CORER_REQ, pf->state); break; case ICE_RESET_GLOBR: - set_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_GLOBR_REQ, pf->state); break; default: return -EINVAL; @@ -2626,8 +2626,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) u32 oicr, ena_mask; dev = ice_pf_to_dev(pf); - set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); + set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); @@ -2639,18 +2639,18 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_MAL_DETECT_M) { ena_mask &= ~PFINT_OICR_MAL_DETECT_M; - set_bit(__ICE_MDD_EVENT_PENDING, pf->state); + set_bit(ICE_MDD_EVENT_PENDING, pf->state); } if (oicr & PFINT_OICR_VFLR_M) { /* disable any further VFLR event notifications */ - if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { u32 reg = rd32(hw, PFINT_OICR_ENA); reg &= ~PFINT_OICR_VFLR_M; wr32(hw, PFINT_OICR_ENA, reg); } else { ena_mask &= ~PFINT_OICR_VFLR_M; - set_bit(__ICE_VFLR_EVENT_PENDING, pf->state); + set_bit(ICE_VFLR_EVENT_PENDING, pf->state); } } @@ -2676,13 +2676,13 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) * We also make note of which reset happened so that peer * devices/drivers can be informed. */ - if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) { + if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) { if (reset == ICE_RESET_CORER) - set_bit(__ICE_CORER_RECV, pf->state); + set_bit(ICE_CORER_RECV, pf->state); else if (reset == ICE_RESET_GLOBR) - set_bit(__ICE_GLOBR_RECV, pf->state); + set_bit(ICE_GLOBR_RECV, pf->state); else - set_bit(__ICE_EMPR_RECV, pf->state); + set_bit(ICE_EMPR_RECV, pf->state); /* There are couple of different bits at play here. * hw->reset_ongoing indicates whether the hardware is @@ -2690,7 +2690,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) * is received and set back to false after the driver * has determined that the hardware is out of reset. * - * __ICE_RESET_OICR_RECV in pf->state indicates + * ICE_RESET_OICR_RECV in pf->state indicates * that a post reset rebuild is required before the * driver is operational again. This is set above. * @@ -2718,7 +2718,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); ice_service_task_schedule(pf); } } @@ -2977,6 +2977,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) if (!netdev) return -ENOMEM; + set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); vsi->netdev = netdev; np = netdev_priv(netdev); np->vsi = vsi; @@ -3078,15 +3079,6 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, struct ice_vsi *vsi = np->vsi; int ret; - if (vid >= VLAN_N_VID) { - netdev_err(netdev, "VLAN id requested %d is out of range %d\n", - vid, VLAN_N_VID); - return -EINVAL; - } - - if (vsi->info.pvid) - return -EINVAL; - /* VLAN 0 is added by default during load/reset */ if (!vid) return 0; @@ -3124,9 +3116,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, struct ice_vsi *vsi = np->vsi; int ret; - if (vsi->info.pvid) - return -EINVAL; - /* don't allow removal of VLAN 0 */ if (!vid) return 0; @@ -3201,6 +3190,7 @@ unroll_napi_add: if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); free_netdev(vsi->netdev); vsi->netdev = NULL; } @@ -3334,7 +3324,7 @@ static int ice_init_pf(struct ice_pf *pf) timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; INIT_WORK(&pf->serv_task, ice_service_task); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); mutex_init(&pf->avail_q_mutex); pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); @@ -3506,15 +3496,14 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /** - * ice_is_wol_supported - get NVM state of WoL - * @pf: board private structure + * ice_is_wol_supported - check if WoL is supported + * @hw: pointer to hardware info * * Check if WoL is supported based on the HW configuration. * Returns true if NVM supports and enables WoL for this port, false otherwise */ -bool ice_is_wol_supported(struct ice_pf *pf) +bool ice_is_wol_supported(struct ice_hw *hw) { - struct ice_hw *hw = &pf->hw; u16 wol_ctrl; /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control @@ -3523,7 +3512,7 @@ bool ice_is_wol_supported(struct ice_pf *pf) if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl)) return false; - return !(BIT(hw->pf_id) & wol_ctrl); + return !(BIT(hw->port_info->lport) & wol_ctrl); } /** @@ -3544,7 +3533,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) if (!new_rx && !new_tx) return -EINVAL; - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -3568,7 +3557,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) ice_pf_dcb_recfg(pf); ice_vsi_open(vsi); done: - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -3971,6 +3960,7 @@ static int ice_register_netdev(struct ice_pf *pf) if (err) goto err_register_netdev; + set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); err = ice_devlink_create_port(vsi); @@ -3982,9 +3972,11 @@ static int ice_register_netdev(struct ice_pf *pf) return 0; err_devlink_create: unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); err_register_netdev: free_netdev(vsi->netdev); vsi->netdev = NULL; + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); return err; } @@ -4010,7 +4002,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (err) return err; - err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); + err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev)); if (err) { dev_err(dev, "BAR0 I/O map error %d\n", err); return err; @@ -4034,9 +4026,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pf->pdev = pdev; pci_set_drvdata(pdev, pf); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); /* Disable service task until DOWN bit is cleared */ - set_bit(__ICE_SERVICE_DIS, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); hw = &pf->hw; hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; @@ -4176,7 +4168,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_alloc_sw_unroll; } - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); /* tell the firmware we are up */ err = ice_send_version(pf); @@ -4195,28 +4187,25 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_send_version_unroll; } + /* not a fatal error if this fails */ err = ice_init_nvm_phy_type(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err); - goto err_send_version_unroll; - } + /* not a fatal error if this fails */ err = ice_update_link_info(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_update_link_info failed: %d\n", err); - goto err_send_version_unroll; - } ice_init_link_dflt_override(pf->hw.port_info); /* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + /* not a fatal error if this fails */ err = ice_init_phy_user_cfg(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err); - goto err_send_version_unroll; - } if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) { struct ice_vsi *vsi = ice_get_main_vsi(pf); @@ -4273,16 +4262,15 @@ probe_done: goto err_netdev_reg; /* ready to go, so clear down state bit */ - clear_bit(__ICE_DOWN, pf->state); - + clear_bit(ICE_DOWN, pf->state); return 0; err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: - set_bit(__ICE_SERVICE_DIS, pf->state); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); + set_bit(ICE_DOWN, pf->state); devm_kfree(dev, pf->first_sw); err_msix_misc_unroll: ice_free_irq_msix_misc(pf); @@ -4382,11 +4370,11 @@ static void ice_remove(struct pci_dev *pdev) } if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { - set_bit(__ICE_VF_RESETS_DISABLED, pf->state); + set_bit(ICE_VF_RESETS_DISABLED, pf->state); ice_free_vfs(pf); } - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); ice_service_task_stop(pf); ice_aq_cancel_waiting_tasks(pf); @@ -4546,13 +4534,13 @@ static int __maybe_unused ice_suspend(struct device *dev) disabled = ice_service_task_stop(pf); /* Already suspended?, then there is nothing to do */ - if (test_and_set_bit(__ICE_SUSPENDED, pf->state)) { + if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { if (!disabled) ice_service_task_restart(pf); return 0; } - if (test_bit(__ICE_DOWN, pf->state) || + if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { dev_err(dev, "can't suspend device in reset or already down\n"); if (!disabled) @@ -4577,6 +4565,7 @@ static int __maybe_unused ice_suspend(struct device *dev) continue; ice_vsi_free_q_vectors(pf->vsi[v]); } + ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf); pci_save_state(pdev); @@ -4623,16 +4612,16 @@ static int __maybe_unused ice_resume(struct device *dev) if (ret) dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); - clear_bit(__ICE_DOWN, pf->state); + clear_bit(ICE_DOWN, pf->state); /* Now perform PF reset and rebuild */ reset_type = ICE_RESET_PFR; /* re-enable service task for reset, but allow reset to schedule it */ - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); if (ice_schedule_reset(pf, reset_type)) dev_err(dev, "Reset during resume failed.\n"); - clear_bit(__ICE_SUSPENDED, pf->state); + clear_bit(ICE_SUSPENDED, pf->state); ice_service_task_restart(pf); /* Restart the service task */ @@ -4661,11 +4650,11 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) return PCI_ERS_RESULT_DISCONNECT; } - if (!test_bit(__ICE_SUSPENDED, pf->state)) { + if (!test_bit(ICE_SUSPENDED, pf->state)) { ice_service_task_stop(pf); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { - set_bit(__ICE_PFR_REQ, pf->state); + if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(ICE_PFR_REQ, pf->state); ice_prepare_for_reset(pf); } } @@ -4732,7 +4721,7 @@ static void ice_pci_err_resume(struct pci_dev *pdev) return; } - if (test_bit(__ICE_SUSPENDED, pf->state)) { + if (test_bit(ICE_SUSPENDED, pf->state)) { dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n", __func__); return; @@ -4753,11 +4742,11 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); - if (!test_bit(__ICE_SUSPENDED, pf->state)) { + if (!test_bit(ICE_SUSPENDED, pf->state)) { ice_service_task_stop(pf); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { - set_bit(__ICE_PFR_REQ, pf->state); + if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(ICE_PFR_REQ, pf->state); ice_prepare_for_reset(pf); } } @@ -4904,7 +4893,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return 0; } - if (test_bit(__ICE_DOWN, pf->state) || + if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't set mac %pM. device not ready\n", mac); @@ -5123,10 +5112,10 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) * separate if/else statements to guarantee each feature is checked */ if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) - ret = ice_vsi_manage_rss_lut(vsi, true); + ice_vsi_manage_rss_lut(vsi, true); else if (!(features & NETIF_F_RXHASH) && netdev->features & NETIF_F_RXHASH) - ret = ice_vsi_manage_rss_lut(vsi, false); + ice_vsi_manage_rss_lut(vsi, false); if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) @@ -5207,6 +5196,105 @@ int ice_vsi_cfg(struct ice_vsi *vsi) return err; } +/* THEORY OF MODERATION: + * The below code creates custom DIM profiles for use by this driver, because + * the ice driver hardware works differently than the hardware that DIMLIB was + * originally made for. ice hardware doesn't have packet count limits that + * can trigger an interrupt, but it *does* have interrupt rate limit support, + * and this code adds that capability to be used by the driver when it's using + * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver + * for how to "respond" to traffic and interrupts, so this driver uses a + * slightly different set of moderation parameters to get best performance. + */ +struct ice_dim { + /* the throttle rate for interrupts, basically worst case delay before + * an initial interrupt fires, value is stored in microseconds. + */ + u16 itr; + /* the rate limit for interrupts, which can cap a delay from a small + * ITR at a certain amount of interrupts per second. f.e. a 2us ITR + * could yield as much as 500,000 interrupts per second, but with a + * 10us rate limit, it limits to 100,000 interrupts per second. Value + * is stored in microseconds. + */ + u16 intrl; +}; + +/* Make a different profile for Rx that doesn't allow quite so aggressive + * moderation at the high end (it maxes out at 128us or about 8k interrupts a + * second. The INTRL/rate parameters here are only useful to cap small ITR + * values, which is why for larger ITR's - like 128, which can only generate + * 8k interrupts per second, there is no point to rate limit and the values + * are set to zero. The rate limit values do affect latency, and so must + * be reasonably small so to not impact latency sensitive tests. + */ +static const struct ice_dim rx_profile[] = { + {2, 10}, + {8, 16}, + {32, 0}, + {96, 0}, + {128, 0} +}; + +/* The transmit profile, which has the same sorts of values + * as the previous struct + */ +static const struct ice_dim tx_profile[] = { + {2, 10}, + {8, 16}, + {64, 0}, + {128, 0}, + {256, 0} +}; + +static void ice_tx_dim_work(struct work_struct *work) +{ + struct ice_ring_container *rc; + struct ice_q_vector *q_vector; + struct dim *dim; + u16 itr, intrl; + + dim = container_of(work, struct dim, work); + rc = container_of(dim, struct ice_ring_container, dim); + q_vector = container_of(rc, struct ice_q_vector, tx); + + if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) + dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + + /* look up the values in our local table */ + itr = tx_profile[dim->profile_ix].itr; + intrl = tx_profile[dim->profile_ix].intrl; + + ice_write_itr(rc, itr); + ice_write_intrl(q_vector, intrl); + + dim->state = DIM_START_MEASURE; +} + +static void ice_rx_dim_work(struct work_struct *work) +{ + struct ice_ring_container *rc; + struct ice_q_vector *q_vector; + struct dim *dim; + u16 itr, intrl; + + dim = container_of(work, struct dim, work); + rc = container_of(dim, struct ice_ring_container, dim); + q_vector = container_of(rc, struct ice_q_vector, rx); + + if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) + dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + + /* look up the values in our local table */ + itr = rx_profile[dim->profile_ix].itr; + intrl = rx_profile[dim->profile_ix].intrl; + + ice_write_itr(rc, itr); + ice_write_intrl(q_vector, intrl); + + dim->state = DIM_START_MEASURE; +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5221,6 +5309,12 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); + q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + + INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); + q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + if (q_vector->rx.ring || q_vector->tx.ring) napi_enable(&q_vector->napi); } @@ -5354,7 +5448,6 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) vsi->tx_linearize = 0; vsi->rx_buf_failed = 0; vsi->rx_page_failed = 0; - vsi->rx_gro_dropped = 0; rcu_read_lock(); @@ -5369,7 +5462,6 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) vsi_stats->rx_bytes += bytes; vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; - vsi->rx_gro_dropped += ring->rx_stats.gro_dropped; } /* update XDP Tx rings counters */ @@ -5391,7 +5483,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; if (test_bit(ICE_VSI_DOWN, vsi->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) + test_bit(ICE_CFG_BUSY, pf->state)) return; /* get stats as recorded by Tx/Rx rings */ @@ -5401,7 +5493,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) ice_update_eth_stats(vsi); cur_ns->tx_errors = cur_es->tx_errors; - cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped; + cur_ns->rx_dropped = cur_es->rx_discards; cur_ns->tx_dropped = cur_es->tx_discards; cur_ns->multicast = cur_es->rx_multicast; @@ -5631,6 +5723,9 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) if (q_vector->rx.ring || q_vector->tx.ring) napi_disable(&q_vector->napi); + + cancel_work_sync(&q_vector->tx.dim.work); + cancel_work_sync(&q_vector->rx.dim.work); } } @@ -5643,7 +5738,7 @@ int ice_down(struct ice_vsi *vsi) int i, tx_err, rx_err, link_err = 0; /* Caller of this function is expected to set the - * vsi->state __ICE_DOWN bit + * vsi->state ICE_DOWN bit */ if (vsi->netdev) { netif_carrier_off(vsi->netdev); @@ -5991,7 +6086,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) enum ice_status ret; int err; - if (test_bit(__ICE_DOWN, pf->state)) + if (test_bit(ICE_DOWN, pf->state)) goto clear_recovery; dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); @@ -6107,7 +6202,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_replay_post(hw); /* if we get here, reset flow is successful */ - clear_bit(__ICE_RESET_FAILED, pf->state); + clear_bit(ICE_RESET_FAILED, pf->state); return; err_vsi_rebuild: @@ -6115,10 +6210,10 @@ err_sched_init_port: ice_sched_cleanup_all(hw); err_init_ctrlq: ice_shutdown_all_ctrlq(hw); - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); clear_recovery: /* set this bit in PF state to control service task scheduling */ - set_bit(__ICE_NEEDS_RESTART, pf->state); + set_bit(ICE_NEEDS_RESTART, pf->state); dev_err(dev, "Rebuild failed, unload and reload driver\n"); } @@ -6640,19 +6735,19 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) switch (pf->tx_timeout_recovery_level) { case 1: - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); break; case 2: - set_bit(__ICE_CORER_REQ, pf->state); + set_bit(ICE_CORER_REQ, pf->state); break; case 3: - set_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_GLOBR_REQ, pf->state); break; default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); - set_bit(__ICE_SERVICE_DIS, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); break; } @@ -6675,12 +6770,35 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) int ice_open(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't open net device while reset is in progress"); + return -EBUSY; + } + + return ice_open_internal(netdev); +} + +/** + * ice_open_internal - Called when a network interface becomes active + * @netdev: network interface device structure + * + * Internal ice_open implementation. Should not be used directly except for ice_open and reset + * handling routine + * + * Returns 0 on success, negative value on failure + */ +int ice_open_internal(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_port_info *pi; + enum ice_status status; int err; - if (test_bit(__ICE_NEEDS_RESTART, pf->state)) { + if (test_bit(ICE_NEEDS_RESTART, pf->state)) { netdev_err(netdev, "driver needs to be unloaded and reloaded\n"); return -EIO; } @@ -6688,17 +6806,17 @@ int ice_open(struct net_device *netdev) netif_carrier_off(netdev); pi = vsi->port_info; - err = ice_update_link_info(pi); - if (err) { - netdev_err(netdev, "Failed to get link info, error %d\n", - err); - return err; + status = ice_update_link_info(pi); + if (status) { + netdev_err(netdev, "Failed to get link info, error %s\n", + ice_stat_str(status)); + return -EIO; } /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); - if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) { + if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) { err = ice_init_phy_user_cfg(pi); if (err) { netdev_err(netdev, "Failed to initialize PHY settings, error %d\n", @@ -6715,12 +6833,7 @@ int ice_open(struct net_device *netdev) } } else { set_bit(ICE_FLAG_NO_MEDIA, pf->flags); - err = ice_aq_set_link_restart_an(pi, false, NULL); - if (err) { - netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n", - vsi->vsi_num, err); - return err; - } + ice_set_link(vsi, false); } err = ice_vsi_open(vsi); @@ -6748,6 +6861,12 @@ int ice_stop(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't stop net device while reset is in progress"); + return -EBUSY; + } ice_vsi_close(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 75ccbfc07f99..fee37a5844cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -644,6 +644,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank, /* Verify that the simple checksum is zero */ for (i = 0; i < sizeof(tmp); i++) + /* cppcheck-suppress objectIndex */ sum += ((u8 *)&tmp)[i]; if (sum) { diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index f890337cc24a..2f097637e405 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -919,7 +919,7 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, } /** - * ice_sched_add_nodes_to_layer - Add nodes to a given layer + * ice_sched_add_nodes_to_hw_layer - Add nodes to HW layer * @pi: port information structure * @tc_node: pointer to TC node * @parent: pointer to parent node @@ -928,82 +928,107 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, * @first_node_teid: pointer to the first node TEID * @num_nodes_added: pointer to number of nodes added * - * This function add nodes to a given layer. + * Add nodes into specific HW layer. */ static enum ice_status -ice_sched_add_nodes_to_layer(struct ice_port_info *pi, - struct ice_sched_node *tc_node, - struct ice_sched_node *parent, u8 layer, - u16 num_nodes, u32 *first_node_teid, - u16 *num_nodes_added) +ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi, + struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, + u16 num_nodes, u32 *first_node_teid, + u16 *num_nodes_added) { - u32 *first_teid_ptr = first_node_teid; - u16 new_num_nodes, max_child_nodes; - enum ice_status status = 0; - struct ice_hw *hw = pi->hw; - u16 num_added = 0; - u32 temp; + u16 max_child_nodes; *num_nodes_added = 0; if (!num_nodes) - return status; + return 0; - if (!parent || layer < hw->sw_entry_point_layer) + if (!parent || layer < pi->hw->sw_entry_point_layer) return ICE_ERR_PARAM; /* max children per node per layer */ - max_child_nodes = hw->max_children[parent->tx_sched_layer]; + max_child_nodes = pi->hw->max_children[parent->tx_sched_layer]; - /* current number of children + required nodes exceed max children ? */ + /* current number of children + required nodes exceed max children */ if ((parent->num_children + num_nodes) > max_child_nodes) { /* Fail if the parent is a TC node */ if (parent == tc_node) return ICE_ERR_CFG; + return ICE_ERR_MAX_LIMIT; + } + + return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, + num_nodes_added, first_node_teid); +} + +/** + * ice_sched_add_nodes_to_layer - Add nodes to a given layer + * @pi: port information structure + * @tc_node: pointer to TC node + * @parent: pointer to parent node + * @layer: layer number to add nodes + * @num_nodes: number of nodes to be added + * @first_node_teid: pointer to the first node TEID + * @num_nodes_added: pointer to number of nodes added + * + * This function add nodes to a given layer. + */ +static enum ice_status +ice_sched_add_nodes_to_layer(struct ice_port_info *pi, + struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, + u16 num_nodes, u32 *first_node_teid, + u16 *num_nodes_added) +{ + u32 *first_teid_ptr = first_node_teid; + u16 new_num_nodes = num_nodes; + enum ice_status status = 0; + *num_nodes_added = 0; + while (*num_nodes_added < num_nodes) { + u16 max_child_nodes, num_added = 0; + /* cppcheck-suppress unusedVariable */ + u32 temp; + + status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent, + layer, new_num_nodes, + first_teid_ptr, + &num_added); + if (!status) + *num_nodes_added += num_added; + /* added more nodes than requested ? */ + if (*num_nodes_added > num_nodes) { + ice_debug(pi->hw, ICE_DBG_SCHED, "added extra nodes %d %d\n", num_nodes, + *num_nodes_added); + status = ICE_ERR_CFG; + break; + } + /* break if all the nodes are added successfully */ + if (!status && (*num_nodes_added == num_nodes)) + break; + /* break if the error is not max limit */ + if (status && status != ICE_ERR_MAX_LIMIT) + break; + /* Exceeded the max children */ + max_child_nodes = pi->hw->max_children[parent->tx_sched_layer]; /* utilize all the spaces if the parent is not full */ if (parent->num_children < max_child_nodes) { new_num_nodes = max_child_nodes - parent->num_children; - /* this recursion is intentional, and wouldn't - * go more than 2 calls + } else { + /* This parent is full, try the next sibling */ + parent = parent->sibling; + /* Don't modify the first node TEID memory if the + * first node was added already in the above call. + * Instead send some temp memory for all other + * recursive calls. */ - status = ice_sched_add_nodes_to_layer(pi, tc_node, - parent, layer, - new_num_nodes, - first_node_teid, - &num_added); - if (status) - return status; + if (num_added) + first_teid_ptr = &temp; - *num_nodes_added += num_added; + new_num_nodes = num_nodes - *num_nodes_added; } - /* Don't modify the first node TEID memory if the first node was - * added already in the above call. Instead send some temp - * memory for all other recursive calls. - */ - if (num_added) - first_teid_ptr = &temp; - - new_num_nodes = num_nodes - num_added; - - /* This parent is full, try the next sibling */ - parent = parent->sibling; - - /* this recursion is intentional, for 1024 queues - * per VSI, it goes max of 16 iterations. - * 1024 / 8 = 128 layer 8 nodes - * 128 /8 = 16 (add 8 nodes per iteration) - */ - status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, - layer, new_num_nodes, - first_teid_ptr, - &num_added); - *num_nodes_added += num_added; - return status; } - - status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, - num_nodes_added, first_node_teid); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 5e5683a3eb23..357d3073d814 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1238,6 +1238,9 @@ ice_add_update_vsi_list(struct ice_hw *hw, ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id); + if (!m_entry->vsi_list_info) + return ICE_ERR_NO_MEMORY; + /* If this entry was large action then the large action needs * to be updated to point to FWD to VSI list */ @@ -2220,6 +2223,7 @@ ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle) return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI && fm_entry->fltr_info.vsi_handle == vsi_handle) || (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST && + fm_entry->vsi_list_info && (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map)))); } @@ -2292,14 +2296,12 @@ ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle, return ICE_ERR_PARAM; list_for_each_entry(fm_entry, lkup_list_head, list_entry) { - struct ice_fltr_info *fi; - - fi = &fm_entry->fltr_info; - if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle)) + if (!ice_vsi_uses_fltr(fm_entry, vsi_handle)) continue; status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, - vsi_list_head, fi); + vsi_list_head, + &fm_entry->fltr_info); if (status) return status; } @@ -2622,7 +2624,7 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, &remove_list_head); mutex_unlock(rule_lock); if (status) - return; + goto free_fltr_list; switch (lkup) { case ICE_SW_LKUP_MAC: @@ -2645,6 +2647,7 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, break; } +free_fltr_list: list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { list_del(&fm_entry->list_entry); devm_kfree(ice_hw_to_dev(hw), fm_entry); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dfdf2c1fa9d3..e2b4b29ea207 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1223,216 +1223,50 @@ construct_skb: } /** - * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic - * @port_info: port_info structure containing the current link speed - * @avg_pkt_size: average size of Tx or Rx packets based on clean routine - * @itr: ITR value to update + * ice_net_dim - Update net DIM algorithm + * @q_vector: the vector associated with the interrupt * - * Calculate how big of an increment should be applied to the ITR value passed - * in based on wmem_default, SKB overhead, ethernet overhead, and the current - * link speed. + * Create a DIM sample and notify net_dim() so that it can possibly decide + * a new ITR value based on incoming packets, bytes, and interrupts. * - * The following is a calculation derived from: - * wmem_default / (size + overhead) = desired_pkts_per_int - * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate - * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value - * - * Assuming wmem_default is 212992 and overhead is 640 bytes per - * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the - * formula down to: - * - * wmem_default * bits_per_byte * usecs_per_sec pkt_size + 24 - * ITR = -------------------------------------------- * -------------- - * rate pkt_size + 640 + * This function is a no-op if the ring is not configured to dynamic ITR. */ -static unsigned int -ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info, - unsigned int avg_pkt_size, - unsigned int itr) +static void ice_net_dim(struct ice_q_vector *q_vector) { - switch (port_info->phy.link_info.link_speed) { - case ICE_AQ_LINK_SPEED_100GB: - itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_50GB: - itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_40GB: - itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_25GB: - itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_20GB: - itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_10GB: - default: - itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - } - - if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { - itr &= ICE_ITR_ADAPTIVE_LATENCY; - itr += ICE_ITR_ADAPTIVE_MAX_USECS; - } + struct ice_ring_container *tx = &q_vector->tx; + struct ice_ring_container *rx = &q_vector->rx; - return itr; -} + if (ITR_IS_DYNAMIC(tx)) { + struct dim_sample dim_sample = {}; + u64 packets = 0, bytes = 0; + struct ice_ring *ring; -/** - * ice_update_itr - update the adaptive ITR value based on statistics - * @q_vector: structure containing interrupt and ring information - * @rc: structure containing ring performance data - * - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - */ -static void -ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) -{ - unsigned long next_update = jiffies; - unsigned int packets, bytes, itr; - bool container_is_rx; + ice_for_each_ring(ring, q_vector->tx) { + packets += ring->stats.pkts; + bytes += ring->stats.bytes; + } - if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) - return; + dim_update_sample(q_vector->total_events, packets, bytes, + &dim_sample); - /* If itr_countdown is set it means we programmed an ITR within - * the last 4 interrupt cycles. This has a side effect of us - * potentially firing an early interrupt. In order to work around - * this we need to throw out any data received for a few - * interrupts following the update. - */ - if (q_vector->itr_countdown) { - itr = rc->target_itr; - goto clear_counts; + net_dim(&tx->dim, dim_sample); } - container_is_rx = (&q_vector->rx == rc); - /* For Rx we want to push the delay up and default to low latency. - * for Tx we want to pull the delay down and default to high latency. - */ - itr = container_is_rx ? - ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : - ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; - - /* If we didn't update within up to 1 - 2 jiffies we can assume - * that either packets are coming in so slow there hasn't been - * any work, or that there is so much work that NAPI is dealing - * with interrupt moderation and we don't need to do anything. - */ - if (time_after(next_update, rc->next_update)) - goto clear_counts; - - prefetch(q_vector->vsi->port_info); - - packets = rc->total_pkts; - bytes = rc->total_bytes; - - if (container_is_rx) { - /* If Rx there are 1 to 4 packets and bytes are less than - * 9000 assume insufficient data to use bulk rate limiting - * approach unless Tx is already in bulk rate limiting. We - * are likely latency driven. - */ - if (packets && packets < 4 && bytes < 9000 && - (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { - itr = ICE_ITR_ADAPTIVE_LATENCY; - goto adjust_by_size_and_speed; - } - } else if (packets < 4) { - /* If we have Tx and Rx ITR maxed and Tx ITR is running in - * bulk mode and we are receiving 4 or fewer packets just - * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so - * that the Rx can relax. - */ - if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && - (q_vector->rx.target_itr & ICE_ITR_MASK) == - ICE_ITR_ADAPTIVE_MAX_USECS) - goto clear_counts; - } else if (packets > 32) { - /* If we have processed over 32 packets in a single interrupt - * for Tx assume we need to switch over to "bulk" mode. - */ - rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; - } + if (ITR_IS_DYNAMIC(rx)) { + struct dim_sample dim_sample = {}; + u64 packets = 0, bytes = 0; + struct ice_ring *ring; - /* We have no packets to actually measure against. This means - * either one of the other queues on this vector is active or - * we are a Tx queue doing TSO with too high of an interrupt rate. - * - * Between 4 and 56 we can assume that our current interrupt delay - * is only slightly too low. As such we should increase it by a small - * fixed amount. - */ - if (packets < 56) { - itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; - if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { - itr &= ICE_ITR_ADAPTIVE_LATENCY; - itr += ICE_ITR_ADAPTIVE_MAX_USECS; + ice_for_each_ring(ring, q_vector->rx) { + packets += ring->stats.pkts; + bytes += ring->stats.bytes; } - goto clear_counts; - } - - if (packets <= 256) { - itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); - itr &= ICE_ITR_MASK; - - /* Between 56 and 112 is our "goldilocks" zone where we are - * working out "just right". Just report that our current - * ITR is good for us. - */ - if (packets <= 112) - goto clear_counts; - /* If packet count is 128 or greater we are likely looking - * at a slight overrun of the delay we want. Try halving - * our delay to see if that will cut the number of packets - * in half per interrupt. - */ - itr >>= 1; - itr &= ICE_ITR_MASK; - if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) - itr = ICE_ITR_ADAPTIVE_MIN_USECS; + dim_update_sample(q_vector->total_events, packets, bytes, + &dim_sample); - goto clear_counts; + net_dim(&rx->dim, dim_sample); } - - /* The paths below assume we are dealing with a bulk ITR since - * number of packets is greater than 256. We are just going to have - * to compute a value and try to bring the count under control, - * though for smaller packet sizes there isn't much we can do as - * NAPI polling will likely be kicking in sooner rather than later. - */ - itr = ICE_ITR_ADAPTIVE_BULK; - -adjust_by_size_and_speed: - - /* based on checks above packets cannot be 0 so division is safe */ - itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info, - bytes / packets, itr); - -clear_counts: - /* write back value */ - rc->target_itr = itr; - - /* next update should occur within next jiffy */ - rc->next_update = next_update + 1; - - rc->total_bytes = 0; - rc->total_pkts = 0; } /** @@ -1456,72 +1290,46 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); } -/* The act of updating the ITR will cause it to immediately trigger. In order - * to prevent this from throwing off adaptive update statistics we defer the - * update so that it can only happen so often. So after either Tx or Rx are - * updated we make the adaptive scheme wait until either the ITR completely - * expires via the next_update expiration or we have been through at least - * 3 interrupts. - */ -#define ITR_COUNTDOWN_START 3 - /** - * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt - * @q_vector: q_vector for which ITR is being updated and interrupt enabled + * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * @q_vector: the vector associated with the interrupt to enable + * + * Update the net_dim() algorithm and re-enable the interrupt associated with + * this vector. + * + * If the VSI is down, the interrupt will not be re-enabled. */ static void ice_update_ena_itr(struct ice_q_vector *q_vector) { - struct ice_ring_container *tx = &q_vector->tx; - struct ice_ring_container *rx = &q_vector->rx; struct ice_vsi *vsi = q_vector->vsi; + bool wb_en = q_vector->wb_on_itr; u32 itr_val; - /* when exiting WB_ON_ITR just reset the countdown and let ITR - * resume it's normal "interrupts-enabled" path - */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) - q_vector->itr_countdown = 0; - - /* This will do nothing if dynamic updates are not enabled */ - ice_update_itr(q_vector, tx); - ice_update_itr(q_vector, rx); + if (test_bit(ICE_DOWN, vsi->state)) + return; - /* This block of logic allows us to get away with only updating - * one ITR value with each interrupt. The idea is to perform a - * pseudo-lazy update with the following criteria. - * - * 1. Rx is given higher priority than Tx if both are in same state - * 2. If we must reduce an ITR that is given highest priority. - * 3. We then give priority to increasing ITR based on amount. + /* When exiting WB_ON_ITR, let ITR resume its normal + * interrupts-enabled path. */ - if (rx->target_itr < rx->current_itr) { - /* Rx ITR needs to be reduced, this is highest priority */ - itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); - rx->current_itr = rx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if ((tx->target_itr < tx->current_itr) || - ((rx->target_itr - rx->current_itr) < - (tx->target_itr - tx->current_itr))) { - /* Tx ITR needs to be reduced, this is second priority - * Tx ITR needs to be increased more than Rx, fourth priority - */ - itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); - tx->current_itr = tx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if (rx->current_itr != rx->target_itr) { - /* Rx ITR needs to be increased, third priority */ - itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); - rx->current_itr = rx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else { - /* Still have to re-enable the interrupts */ - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - if (q_vector->itr_countdown) - q_vector->itr_countdown--; + if (wb_en) + q_vector->wb_on_itr = false; + + /* This will do nothing if dynamic updates are not enabled. */ + ice_net_dim(q_vector); + + /* net_dim() updates ITR out-of-band using a work item */ + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + /* trigger an immediate software interrupt when exiting + * busy poll, to make sure to catch any pending cleanups + * that might have been missed due to interrupt state + * transition. + */ + if (wb_en) { + itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_SW_ITR_INDX_M | + GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; } - - if (!test_bit(ICE_VSI_DOWN, vsi->state)) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } /** @@ -1543,7 +1351,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) struct ice_vsi *vsi = q_vector->vsi; /* already in wb_on_itr mode no need to change it */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) + if (q_vector->wb_on_itr) return; /* use previously set ITR values for all of the ITR indices by @@ -1555,7 +1363,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | GLINT_DYN_CTL_WB_ON_ITR_M); - q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; + q_vector->wb_on_itr = true; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ffe0d271dec7..c5a92ac787d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -192,7 +192,6 @@ struct ice_rxq_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buf_failed; - u64 gro_dropped; /* GRO returned dropped */ }; enum ice_ring_state_t { @@ -224,23 +223,20 @@ enum ice_rx_dtype { #define ICE_TX_ITR ICE_IDX_ITR1 #define ICE_ITR_8K 124 #define ICE_ITR_20K 50 -#define ICE_ITR_MAX 8160 -#define ICE_DFLT_TX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) -#define ICE_DFLT_RX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) -#define ICE_ITR_DYNAMIC 0x8000 /* used as flag for itr_setting */ -#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC)) -#define ITR_TO_REG(setting) ((setting) & ~ICE_ITR_DYNAMIC) +#define ICE_ITR_MAX 8160 /* 0x1FE0 */ +#define ICE_DFLT_TX_ITR ICE_ITR_20K +#define ICE_DFLT_RX_ITR ICE_ITR_20K +enum ice_dynamic_itr { + ITR_STATIC = 0, + ITR_DYNAMIC = 1 +}; + +#define ITR_IS_DYNAMIC(rc) ((rc)->itr_mode == ITR_DYNAMIC) #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ #define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) -#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 -#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 -#define ICE_ITR_ADAPTIVE_MAX_USECS 0x00FA -#define ICE_ITR_ADAPTIVE_LATENCY 0x8000 -#define ICE_ITR_ADAPTIVE_BULK 0x0000 - #define ICE_DFLT_INTRL 0 #define ICE_MAX_INTRL 236 @@ -340,17 +336,14 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring) struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; - unsigned long next_update; /* jiffies value of next queue update */ - unsigned int total_bytes; /* total bytes processed this int */ - unsigned int total_pkts; /* total packets processed this int */ + struct dim dim; /* data for net_dim algorithm */ u16 itr_idx; /* index in the interrupt vector */ - u16 target_itr; /* value in usecs divided by the hw->itr_gran */ - u16 current_itr; /* value in usecs divided by the hw->itr_gran */ - /* high bit set means dynamic ITR, rest is used to store user - * readable ITR value in usecs and must be converted before programming - * to a register. + /* this matches the maximum number of ITR bits, but in usec + * values, so it is shifted left one bit (bit zero is ignored) */ - u16 itr_setting; + u16 itr_setting:13; + u16 itr_reserved:2; + u16 itr_mode:1; }; struct ice_coalesce_stored { diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 276ebcc309dc..9b80962ff92f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -551,9 +551,7 @@ struct ice_dcb_app_priority_table { #define ICE_TLV_STATUS_OPER 0x1 #define ICE_TLV_STATUS_SYNC 0x2 #define ICE_TLV_STATUS_ERR 0x4 -#define ICE_APP_PROT_ID_FCOE 0x8906 -#define ICE_APP_PROT_ID_ISCSI 0x0cbc -#define ICE_APP_PROT_ID_FIP 0x8914 +#define ICE_APP_PROT_ID_ISCSI_860 0x035c #define ICE_APP_SEL_ETHTYPE 0x1 #define ICE_APP_SEL_TCPIP 0x2 #define ICE_CEE_APP_SEL_ETHTYPE 0x0 @@ -941,4 +939,9 @@ struct ice_aq_get_set_rss_lut_params { #define ICE_FW_API_LLDP_FLTR_MIN 7 #define ICE_FW_API_LLDP_FLTR_PATCH 1 +/* AQ API version for report default configuration */ +#define ICE_FW_API_REPORT_DFLT_CFG_MAJ 1 +#define ICE_FW_API_REPORT_DFLT_CFG_MIN 7 +#define ICE_FW_API_REPORT_DFLT_CFG_PATCH 3 + #endif /* _ICE_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 1f4ba38b1599..eee180d8c024 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1548,7 +1548,7 @@ static void ice_vf_fdir_timer(struct timer_list *t) ctx_done->v_opcode = ctx_irq->v_opcode; spin_unlock_irqrestore(&fdir->ctx_lock, flags); - set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state); + set_bit(ICE_FD_VF_FLUSH_CTX, pf->state); ice_service_task_schedule(pf); } @@ -1596,7 +1596,7 @@ ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, if (!ret) dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id); - set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state); + set_bit(ICE_FD_VF_FLUSH_CTX, pf->state); ice_service_task_schedule(pf); } @@ -1847,7 +1847,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf) { int i; - if (!test_and_clear_bit(__ICE_FD_VF_FLUSH_CTX, pf->state)) + if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state)) return; ice_for_each_vf(pf, i) { diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index e68d52a6b11d..e38d4adc5b8d 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -371,7 +371,7 @@ void ice_free_vfs(struct ice_pf *pf) if (!pf->vf) return; - while (test_and_set_bit(__ICE_VF_DIS, pf->state)) + while (test_and_set_bit(ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); /* Disable IOV before freeing resources. This lets any VF drivers @@ -424,7 +424,7 @@ void ice_free_vfs(struct ice_pf *pf) wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } } - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -1258,7 +1258,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) return false; /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(__ICE_VF_DIS, pf->state)) + if (test_and_set_bit(ICE_VF_DIS, pf->state)) return false; /* Begin reset on all VFs at once */ @@ -1314,7 +1314,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) } ice_flush(hw); - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); return true; } @@ -1334,7 +1334,7 @@ static bool ice_is_vf_disabled(struct ice_vf *vf) * means something else is resetting the VF, so we shouldn't continue. * Otherwise, set disable VF state bit for actual reset, and continue. */ - return (test_bit(__ICE_VF_DIS, pf->state) || + return (test_bit(ICE_VF_DIS, pf->state) || test_bit(ICE_VF_STATE_DIS, vf->vf_states)); } @@ -1359,7 +1359,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) dev = ice_pf_to_dev(pf); - if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n", vf->vf_id); return true; @@ -1651,7 +1651,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) /* Disable global interrupt 0 so we don't try to handle the VFLR. */ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); - set_bit(__ICE_OICR_INTR_DIS, pf->state); + set_bit(ICE_OICR_INTR_DIS, pf->state); ice_flush(hw); ret = pci_enable_sriov(pf->pdev, num_vfs); @@ -1679,7 +1679,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) goto err_unroll_sriov; } - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); return 0; err_unroll_sriov: @@ -1691,7 +1691,7 @@ err_pci_disable_sriov: err_unroll_intr: /* rearm interrupts here */ ice_irq_dynamic_ena(hw, NULL, NULL); - clear_bit(__ICE_OICR_INTR_DIS, pf->state); + clear_bit(ICE_OICR_INTR_DIS, pf->state); return ret; } @@ -1809,7 +1809,7 @@ void ice_process_vflr_event(struct ice_pf *pf) unsigned int vf_id; u32 reg; - if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || + if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || !pf->num_alloc_vfs) return; @@ -4194,7 +4194,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) int i; /* check that there are pending MDD events to print */ - if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state)) + if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) return; /* VF MDD event logs are rate limited to one second intervals */ @@ -4234,7 +4234,6 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) */ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { - struct pci_dev *vfdev; u16 vf_id; int pos; @@ -4243,6 +4242,8 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); if (pos) { + struct pci_dev *vfdev; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); vfdev = pci_get_device(pdev->vendor, vf_id, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 17ab8ef024ad..faa7b8d96adb 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -108,9 +108,6 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) ice_cfg_itr(hw, q_vector); - wr32(hw, GLINT_RATE(reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); - ice_for_each_ring(ring, q_vector->tx) ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, q_vector->tx.itr_idx); @@ -159,7 +156,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) rx_ring = vsi->rx_rings[q_idx]; q_vector = rx_ring->q_vector; - while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) { timeout--; if (!timeout) return -EBUSY; @@ -249,7 +246,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(__ICE_CFG_BUSY, vsi->state); + clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); @@ -758,7 +755,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_ring *ring; - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index d2e2c50ce257..ca5429774994 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -340,10 +340,10 @@ #define I210_RXPBSIZE_PB_32KB 0x00000020 #define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ #define I210_TXPBSIZE_MASK 0xC0FFFFFF -#define I210_TXPBSIZE_PB0_8KB (8 << 0) -#define I210_TXPBSIZE_PB1_8KB (8 << 6) -#define I210_TXPBSIZE_PB2_4KB (4 << 12) -#define I210_TXPBSIZE_PB3_4KB (4 << 18) +#define I210_TXPBSIZE_PB0_6KB (6 << 0) +#define I210_TXPBSIZE_PB1_6KB (6 << 6) +#define I210_TXPBSIZE_PB2_6KB (6 << 12) +#define I210_TXPBSIZE_PB3_6KB (6 << 18) #define I210_DTXMXPKTSZ_DEFAULT 0x00000098 diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index fd8eb2f9ab9d..e63ee3cca5ea 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -484,6 +484,31 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) } /** + * igb_i21x_hw_doublecheck - double checks potential HW issue in i21X + * @hw: pointer to the HW structure + * + * Checks if multicast array is wrote correctly + * If not then rewrites again to register + **/ +static void igb_i21x_hw_doublecheck(struct e1000_hw *hw) +{ + bool is_failed; + int i; + + do { + is_failed = false; + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) { + if (array_rd32(E1000_MTA, i) != hw->mac.mta_shadow[i]) { + is_failed = true; + array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); + wrfl(); + break; + } + } + } while (is_failed); +} + +/** * igb_update_mc_addr_list - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program @@ -516,6 +541,8 @@ void igb_update_mc_addr_list(struct e1000_hw *hw, for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); wrfl(); + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) + igb_i21x_hw_doublecheck(hw); } /** diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c9e8c65a3cfe..038a9fd1af44 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1921,8 +1921,8 @@ static void igb_setup_tx_mode(struct igb_adapter *adapter) */ val = rd32(E1000_TXPBS); val &= ~I210_TXPBSIZE_MASK; - val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB | - I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB; + val |= I210_TXPBSIZE_PB0_6KB | I210_TXPBSIZE_PB1_6KB | + I210_TXPBSIZE_PB2_6KB | I210_TXPBSIZE_PB3_6KB; wr32(E1000_TXPBS, val); val = rd32(E1000_RXPBS); diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 91493a73355d..25871351730b 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -28,6 +28,11 @@ void igc_ethtool_set_ops(struct net_device *); #define MAX_ETYPE_FILTER 8 #define IGC_RETA_SIZE 128 +/* SDP support */ +#define IGC_N_EXTTS 2 +#define IGC_N_PEROUT 2 +#define IGC_N_SDP 4 + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -223,6 +228,14 @@ struct igc_adapter { char fw_version[32]; struct bpf_prog *xdp_prog; + + bool pps_sys_wrap_on; + + struct ptp_pin_desc sdp_config[IGC_N_SDP]; + struct { + struct timespec64 start; + struct timespec64 period; + } perout[IGC_N_PEROUT]; }; void igc_up(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 35ed997af075..0103dda32f39 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -8,6 +8,8 @@ #define REQ_TX_DESCRIPTOR_MULTIPLE 8 #define REQ_RX_DESCRIPTOR_MULTIPLE 8 +#define IGC_CTRL_EXT_SDP2_DIR 0x00000400 /* SDP2 Data direction */ +#define IGC_CTRL_EXT_SDP3_DIR 0x00000800 /* SDP3 Data direction */ #define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ /* Definitions for power management and wakeup registers */ @@ -96,6 +98,9 @@ #define IGC_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ #define IGC_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ +#define IGC_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ +#define IGC_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ + /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ #define MAX_JUMBO_FRAME_SIZE 0x2600 @@ -403,6 +408,64 @@ #define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ #define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */ +/* Timer selection bits */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for auxiliary time stamp */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for auxiliary time stamp */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM2 (2u << 30) /* Select SYSTIM2 for auxiliary time stamp */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM3 (3u << 30) /* Select SYSTIM3 for auxiliary time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for target time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for target time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM2 (2u << 30) /* Select SYSTIM2 for target time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM3 (3u << 30) /* Select SYSTIM3 for target time stamp */ + +/* TSAUXC Configuration Bits */ +#define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ +#define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ +#define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ +#define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ +#define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ +#define IGC_TSAUXC_AUTT1 BIT(11) /* Auxiliary Timestamp Taken. */ +#define IGC_TSAUXC_PLSG BIT(17) /* Generate a pulse. */ +#define IGC_TSAUXC_DISABLE1 BIT(27) /* Disable SYSTIM0 Count Operation. */ +#define IGC_TSAUXC_DISABLE2 BIT(28) /* Disable SYSTIM1 Count Operation. */ +#define IGC_TSAUXC_DISABLE3 BIT(29) /* Disable SYSTIM2 Count Operation. */ +#define IGC_TSAUXC_DIS_TS_CLEAR BIT(30) /* Disable EN_TT0/1 auto clear. */ +#define IGC_TSAUXC_DISABLE0 BIT(31) /* Disable SYSTIM0 Count Operation. */ + +/* SDP Configuration Bits */ +#define IGC_AUX0_SEL_SDP0 (0u << 0) /* Assign SDP0 to auxiliary time stamp 0. */ +#define IGC_AUX0_SEL_SDP1 (1u << 0) /* Assign SDP1 to auxiliary time stamp 0. */ +#define IGC_AUX0_SEL_SDP2 (2u << 0) /* Assign SDP2 to auxiliary time stamp 0. */ +#define IGC_AUX0_SEL_SDP3 (3u << 0) /* Assign SDP3 to auxiliary time stamp 0. */ +#define IGC_AUX0_TS_SDP_EN (1u << 2) /* Enable auxiliary time stamp trigger 0. */ +#define IGC_AUX1_SEL_SDP0 (0u << 3) /* Assign SDP0 to auxiliary time stamp 1. */ +#define IGC_AUX1_SEL_SDP1 (1u << 3) /* Assign SDP1 to auxiliary time stamp 1. */ +#define IGC_AUX1_SEL_SDP2 (2u << 3) /* Assign SDP2 to auxiliary time stamp 1. */ +#define IGC_AUX1_SEL_SDP3 (3u << 3) /* Assign SDP3 to auxiliary time stamp 1. */ +#define IGC_AUX1_TS_SDP_EN (1u << 5) /* Enable auxiliary time stamp trigger 1. */ +#define IGC_TS_SDP0_SEL_TT0 (0u << 6) /* Target time 0 is output on SDP0. */ +#define IGC_TS_SDP0_SEL_TT1 (1u << 6) /* Target time 1 is output on SDP0. */ +#define IGC_TS_SDP0_SEL_FC0 (2u << 6) /* Freq clock 0 is output on SDP0. */ +#define IGC_TS_SDP0_SEL_FC1 (3u << 6) /* Freq clock 1 is output on SDP0. */ +#define IGC_TS_SDP0_EN (1u << 8) /* SDP0 is assigned to Tsync. */ +#define IGC_TS_SDP1_SEL_TT0 (0u << 9) /* Target time 0 is output on SDP1. */ +#define IGC_TS_SDP1_SEL_TT1 (1u << 9) /* Target time 1 is output on SDP1. */ +#define IGC_TS_SDP1_SEL_FC0 (2u << 9) /* Freq clock 0 is output on SDP1. */ +#define IGC_TS_SDP1_SEL_FC1 (3u << 9) /* Freq clock 1 is output on SDP1. */ +#define IGC_TS_SDP1_EN (1u << 11) /* SDP1 is assigned to Tsync. */ +#define IGC_TS_SDP2_SEL_TT0 (0u << 12) /* Target time 0 is output on SDP2. */ +#define IGC_TS_SDP2_SEL_TT1 (1u << 12) /* Target time 1 is output on SDP2. */ +#define IGC_TS_SDP2_SEL_FC0 (2u << 12) /* Freq clock 0 is output on SDP2. */ +#define IGC_TS_SDP2_SEL_FC1 (3u << 12) /* Freq clock 1 is output on SDP2. */ +#define IGC_TS_SDP2_EN (1u << 14) /* SDP2 is assigned to Tsync. */ +#define IGC_TS_SDP3_SEL_TT0 (0u << 15) /* Target time 0 is output on SDP3. */ +#define IGC_TS_SDP3_SEL_TT1 (1u << 15) /* Target time 1 is output on SDP3. */ +#define IGC_TS_SDP3_SEL_FC0 (2u << 15) /* Freq clock 0 is output on SDP3. */ +#define IGC_TS_SDP3_SEL_FC1 (3u << 15) /* Freq clock 1 is output on SDP3. */ +#define IGC_TS_SDP3_EN (1u << 17) /* SDP3 is assigned to Tsync. */ + /* Transmit Scheduling */ #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x00000001 #define IGC_TQAVCTRL_ENHANCED_QAV 0x00000008 diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 8722294ab90c..9722449d7633 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -65,6 +65,8 @@ static const struct igc_stats igc_gstrings_stats[] = { IGC_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), IGC_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped), IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), + IGC_STAT("tx_lpi_counter", stats.tlpic), + IGC_STAT("rx_lpi_counter", stats.rlpic), }; #define IGC_NETDEV_STAT(_net_stat) { \ diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index cc83bb5c15e8..b2ef9fde97b3 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -229,10 +229,11 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || words == 0) { hw_dbg("nvm parameter(s) out of bounds\n"); - goto out; + return ret_val; } for (i = 0; i < words; i++) { + ret_val = -IGC_ERR_NVM; eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) | (data[i] << IGC_NVM_RW_REG_DATA) | IGC_NVM_RW_REG_START; @@ -254,7 +255,6 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, } } -out: return ret_val; } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 10765491e357..069471b7ffb0 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4250,9 +4250,20 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, static void igc_tsync_interrupt(struct igc_adapter *adapter) { + u32 ack, tsauxc, sec, nsec, tsicr; struct igc_hw *hw = &adapter->hw; - u32 tsicr = rd32(IGC_TSICR); - u32 ack = 0; + struct ptp_clock_event event; + struct timespec64 ts; + + tsicr = rd32(IGC_TSICR); + ack = 0; + + if (tsicr & IGC_TSICR_SYS_WRAP) { + event.type = PTP_CLOCK_PPS; + if (adapter->ptp_caps.pps) + ptp_clock_event(adapter->ptp_clock, &event); + ack |= IGC_TSICR_SYS_WRAP; + } if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ @@ -4260,6 +4271,54 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) ack |= IGC_TSICR_TXTS; } + if (tsicr & IGC_TSICR_TT0) { + spin_lock(&adapter->tmreg_lock); + ts = timespec64_add(adapter->perout[0].start, + adapter->perout[0].period); + wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); + tsauxc = rd32(IGC_TSAUXC); + tsauxc |= IGC_TSAUXC_EN_TT0; + wr32(IGC_TSAUXC, tsauxc); + adapter->perout[0].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= IGC_TSICR_TT0; + } + + if (tsicr & IGC_TSICR_TT1) { + spin_lock(&adapter->tmreg_lock); + ts = timespec64_add(adapter->perout[1].start, + adapter->perout[1].period); + wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); + tsauxc = rd32(IGC_TSAUXC); + tsauxc |= IGC_TSAUXC_EN_TT1; + wr32(IGC_TSAUXC, tsauxc); + adapter->perout[1].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= IGC_TSICR_TT1; + } + + if (tsicr & IGC_TSICR_AUTT0) { + nsec = rd32(IGC_AUXSTMPL0); + sec = rd32(IGC_AUXSTMPH0); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = sec * NSEC_PER_SEC + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= IGC_TSICR_AUTT0; + } + + if (tsicr & IGC_TSICR_AUTT1) { + nsec = rd32(IGC_AUXSTMPL1); + sec = rd32(IGC_AUXSTMPH1); + event.type = PTP_CLOCK_EXTTS; + event.index = 1; + event.timestamp = sec * NSEC_PER_SEC + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= IGC_TSICR_AUTT1; + } + /* acknowledge the interrupts */ wr32(IGC_TSICR, ack); } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index dfa3b247fcd8..69617d2c1be2 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -120,12 +120,289 @@ static int igc_ptp_settime_i225(struct ptp_clock_info *ptp, return 0; } +static void igc_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext) +{ + u32 *ptr = pin < 2 ? ctrl : ctrl_ext; + static const u32 mask[IGC_N_SDP] = { + IGC_CTRL_SDP0_DIR, + IGC_CTRL_SDP1_DIR, + IGC_CTRL_EXT_SDP2_DIR, + IGC_CTRL_EXT_SDP3_DIR, + }; + + if (input) + *ptr &= ~mask[pin]; + else + *ptr |= mask[pin]; +} + +static void igc_pin_perout(struct igc_adapter *igc, int chan, int pin, int freq) +{ + static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = { + IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3, + }; + static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = { + IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3, + }; + static const u32 igc_ts_sdp_en[IGC_N_SDP] = { + IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN, + }; + static const u32 igc_ts_sdp_sel_tt0[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_TT0, IGC_TS_SDP1_SEL_TT0, + IGC_TS_SDP2_SEL_TT0, IGC_TS_SDP3_SEL_TT0, + }; + static const u32 igc_ts_sdp_sel_tt1[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_TT1, IGC_TS_SDP1_SEL_TT1, + IGC_TS_SDP2_SEL_TT1, IGC_TS_SDP3_SEL_TT1, + }; + static const u32 igc_ts_sdp_sel_fc0[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_FC0, IGC_TS_SDP1_SEL_FC0, + IGC_TS_SDP2_SEL_FC0, IGC_TS_SDP3_SEL_FC0, + }; + static const u32 igc_ts_sdp_sel_fc1[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1, + IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1, + }; + static const u32 igc_ts_sdp_sel_clr[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1, + IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1, + }; + struct igc_hw *hw = &igc->hw; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(IGC_CTRL); + ctrl_ext = rd32(IGC_CTRL_EXT); + tssdp = rd32(IGC_TSSDP); + + igc_pin_direction(pin, 0, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an input. */ + if ((tssdp & IGC_AUX0_SEL_SDP3) == igc_aux0_sel_sdp[pin]) + tssdp &= ~IGC_AUX0_TS_SDP_EN; + + if ((tssdp & IGC_AUX1_SEL_SDP3) == igc_aux1_sel_sdp[pin]) + tssdp &= ~IGC_AUX1_TS_SDP_EN; + + tssdp &= ~igc_ts_sdp_sel_clr[pin]; + if (freq) { + if (chan == 1) + tssdp |= igc_ts_sdp_sel_fc1[pin]; + else + tssdp |= igc_ts_sdp_sel_fc0[pin]; + } else { + if (chan == 1) + tssdp |= igc_ts_sdp_sel_tt1[pin]; + else + tssdp |= igc_ts_sdp_sel_tt0[pin]; + } + tssdp |= igc_ts_sdp_en[pin]; + + wr32(IGC_TSSDP, tssdp); + wr32(IGC_CTRL, ctrl); + wr32(IGC_CTRL_EXT, ctrl_ext); +} + +static void igc_pin_extts(struct igc_adapter *igc, int chan, int pin) +{ + static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = { + IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3, + }; + static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = { + IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3, + }; + static const u32 igc_ts_sdp_en[IGC_N_SDP] = { + IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN, + }; + struct igc_hw *hw = &igc->hw; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(IGC_CTRL); + ctrl_ext = rd32(IGC_CTRL_EXT); + tssdp = rd32(IGC_TSSDP); + + igc_pin_direction(pin, 1, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an output. */ + tssdp &= ~igc_ts_sdp_en[pin]; + + if (chan == 1) { + tssdp &= ~IGC_AUX1_SEL_SDP3; + tssdp |= igc_aux1_sel_sdp[pin] | IGC_AUX1_TS_SDP_EN; + } else { + tssdp &= ~IGC_AUX0_SEL_SDP3; + tssdp |= igc_aux0_sel_sdp[pin] | IGC_AUX0_TS_SDP_EN; + } + + wr32(IGC_TSSDP, tssdp); + wr32(IGC_CTRL, ctrl); + wr32(IGC_CTRL_EXT, ctrl_ext); +} + static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { + struct igc_adapter *igc = + container_of(ptp, struct igc_adapter, ptp_caps); + struct igc_hw *hw = &igc->hw; + unsigned long flags; + struct timespec64 ts; + int use_freq = 0, pin = -1; + u32 tsim, tsauxc, tsauxc_mask, tsim_mask, trgttiml, trgttimh, freqout; + s64 ns; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; + + /* Reject requests failing to enable both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + + if (on) { + pin = ptp_find_pin(igc->ptp_clock, PTP_PF_EXTTS, + rq->extts.index); + if (pin < 0) + return -EBUSY; + } + if (rq->extts.index == 1) { + tsauxc_mask = IGC_TSAUXC_EN_TS1; + tsim_mask = IGC_TSICR_AUTT1; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TS0; + tsim_mask = IGC_TSICR_AUTT0; + } + spin_lock_irqsave(&igc->tmreg_lock, flags); + tsauxc = rd32(IGC_TSAUXC); + tsim = rd32(IGC_TSIM); + if (on) { + igc_pin_extts(igc, rq->extts.index, pin); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } else { + tsauxc &= ~tsauxc_mask; + tsim &= ~tsim_mask; + } + wr32(IGC_TSAUXC, tsauxc); + wr32(IGC_TSIM, tsim); + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PEROUT: + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + + if (on) { + pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + } + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + ns = ns >> 1; + if (on && (ns <= 70000000LL || ns == 125000000LL || + ns == 250000000LL || ns == 500000000LL)) { + if (ns < 8LL) + return -EINVAL; + use_freq = 1; + } + ts = ns_to_timespec64(ns); + if (rq->perout.index == 1) { + if (use_freq) { + tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsim_mask = 0; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TT1; + tsim_mask = IGC_TSICR_TT1; + } + trgttiml = IGC_TRGTTIML1; + trgttimh = IGC_TRGTTIMH1; + freqout = IGC_FREQOUT1; + } else { + if (use_freq) { + tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsim_mask = 0; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TT0; + tsim_mask = IGC_TSICR_TT0; + } + trgttiml = IGC_TRGTTIML0; + trgttimh = IGC_TRGTTIMH0; + freqout = IGC_FREQOUT0; + } + spin_lock_irqsave(&igc->tmreg_lock, flags); + tsauxc = rd32(IGC_TSAUXC); + tsim = rd32(IGC_TSIM); + if (rq->perout.index == 1) { + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsim &= ~IGC_TSICR_TT1; + } else { + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsim &= ~IGC_TSICR_TT0; + } + if (on) { + int i = rq->perout.index; + + igc_pin_perout(igc, i, pin, use_freq); + igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc->perout[i].start.tv_nsec = rq->perout.start.nsec; + igc->perout[i].period.tv_sec = ts.tv_sec; + igc->perout[i].period.tv_nsec = ts.tv_nsec; + wr32(trgttimh, rq->perout.start.sec); + /* For now, always select timer 0 as source. */ + wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + if (use_freq) + wr32(freqout, ns); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } + wr32(IGC_TSAUXC, tsauxc); + wr32(IGC_TSIM, tsim); + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PPS: + spin_lock_irqsave(&igc->tmreg_lock, flags); + tsim = rd32(IGC_TSIM); + if (on) + tsim |= IGC_TSICR_SYS_WRAP; + else + tsim &= ~IGC_TSICR_SYS_WRAP; + igc->pps_sys_wrap_on = on; + wr32(IGC_TSIM, tsim); + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + return 0; + + default: + break; + } + return -EOPNOTSUPP; } +static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + case PTP_PF_PHYSYNC: + return -1; + } + return 0; +} + /** * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp * @adapter: board private structure @@ -486,9 +763,17 @@ void igc_ptp_init(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct igc_hw *hw = &adapter->hw; + int i; switch (hw->mac.type) { case igc_i225: + for (i = 0; i < IGC_N_SDP; i++) { + struct ptp_pin_desc *ppd = &adapter->sdp_config[i]; + + snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i); + ppd->index = i; + ppd->func = PTP_PF_NONE; + } snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 62499999; @@ -497,6 +782,12 @@ void igc_ptp_init(struct igc_adapter *adapter) adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225; adapter->ptp_caps.settime64 = igc_ptp_settime_i225; adapter->ptp_caps.enable = igc_ptp_feature_enable_i225; + adapter->ptp_caps.pps = 1; + adapter->ptp_caps.pin_config = adapter->sdp_config; + adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS; + adapter->ptp_caps.n_per_out = IGC_N_PEROUT; + adapter->ptp_caps.n_pins = IGC_N_SDP; + adapter->ptp_caps.verify = igc_ptp_verify_pin; break; default: adapter->ptp_clock = NULL; @@ -598,7 +889,9 @@ void igc_ptp_reset(struct igc_adapter *adapter) case igc_i225: wr32(IGC_TSAUXC, 0x0); wr32(IGC_TSSDP, 0x0); - wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS); + wr32(IGC_TSIM, + IGC_TSICR_INTERRUPTS | + (adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0)); wr32(IGC_IMS, IGC_IMS_TS); break; default: diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 3e5cb7aef9da..cc174853554b 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -192,6 +192,16 @@ #define IGC_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ #define IGC_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ #define IGC_TSSDP 0x0003C /* Time Sync SDP Configuration Register - RW */ +#define IGC_TRGTTIML0 0x0B644 /* Target Time Register 0 Low - RW */ +#define IGC_TRGTTIMH0 0x0B648 /* Target Time Register 0 High - RW */ +#define IGC_TRGTTIML1 0x0B64C /* Target Time Register 1 Low - RW */ +#define IGC_TRGTTIMH1 0x0B650 /* Target Time Register 1 High - RW */ +#define IGC_FREQOUT0 0x0B654 /* Frequency Out 0 Control Register - RW */ +#define IGC_FREQOUT1 0x0B658 /* Frequency Out 1 Control Register - RW */ +#define IGC_AUXSTMPL0 0x0B65C /* Auxiliary Time Stamp 0 Register Low - RO */ +#define IGC_AUXSTMPH0 0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */ +#define IGC_AUXSTMPL1 0x0B664 /* Auxiliary Time Stamp 1 Register Low - RO */ +#define IGC_AUXSTMPH1 0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */ #define IGC_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ #define IGC_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index c00332d2e02a..72e6ebffea33 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -361,7 +361,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) } #ifdef IXGBE_FCOE - /* Reprogam FCoE hardware offloads when the traffic class + /* Reprogram FCoE hardware offloads when the traffic class * FCoE is using changes. This happens if the APP info * changes or the up2tc mapping is updated. */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7ba1c2985ef7..c5ec17d19c59 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6536,6 +6536,13 @@ err_setup_tx: return err; } +static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_q_vector *q_vector = rx_ring->q_vector; + + return q_vector ? q_vector->napi.napi_id : 0; +} + /** * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: pointer to ixgbe_adapter @@ -6583,7 +6590,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, /* XDP RX-queue info */ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, rx_ring->q_vector->napi.napi_id) < 0) + rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0) goto err; rx_ring->xdp_prog = adapter->xdp_prog; @@ -6892,6 +6899,11 @@ static int __maybe_unused ixgbe_resume(struct device *dev_d) adapter->hw.hw_addr = adapter->io_addr; + err = pci_enable_device_mem(pdev); + if (err) { + e_dev_err("Cannot enable PCI device from suspend\n"); + return err; + } smp_mb__before_atomic(); clear_bit(__IXGBE_DISABLED, &adapter->state); pci_set_master(pdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 73bc170d1ae9..24aa97f993ca 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -380,6 +380,9 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) case X557_PHY_ID2: phy_type = ixgbe_phy_x550em_ext_t; break; + case BCM54616S_E_PHY_ID: + phy_type = ixgbe_phy_ext_1g_t; + break; default: phy_type = ixgbe_phy_unknown; break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2be1c4c72435..2647937f7f4d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1407,6 +1407,7 @@ struct ixgbe_nvm_version { #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 #define AQ_FW_REV 0x20 +#define BCM54616S_E_PHY_ID 0x03625D10 /* Special PHY Init Routine */ #define IXGBE_PHY_INIT_OFFSET_NL 0x002B @@ -3383,10 +3384,6 @@ struct ixgbe_hw_stats { /* forward declaration */ struct ixgbe_hw; -/* iterator type for walking multicast address lists */ -typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, - u32 *vmdq); - /* Function pointer table */ struct ixgbe_eeprom_operations { s32 (*init_params)(struct ixgbe_hw *); diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index d1e9e306653b..1d8209df4162 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -16,9 +16,6 @@ struct ixgbe_hw; -/* iterator type for walking multicast address lists */ -typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, - u32 *vmdq); struct ixgbe_mac_operations { s32 (*init_hw)(struct ixgbe_hw *); s32 (*reset_hw)(struct ixgbe_hw *); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 925161959b9b..6f987a7ffcb3 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -41,7 +41,10 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/ioport.h> +#include <linux/iopoll.h> #include <linux/in.h> +#include <linux/of_device.h> +#include <linux/of_net.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/delay.h> @@ -54,21 +57,246 @@ #include <linux/ethtool.h> #include <linux/crc32.h> #include <linux/pgtable.h> - -#include <asm/bootinfo.h> -#include <asm/bitops.h> -#include <asm/io.h> -#include <asm/dma.h> - -#include <asm/mach-rc32434/rb.h> -#include <asm/mach-rc32434/rc32434.h> -#include <asm/mach-rc32434/eth.h> -#include <asm/mach-rc32434/dma_v.h> +#include <linux/clk.h> #define DRV_NAME "korina" #define DRV_VERSION "0.20" #define DRV_RELDATE "15Sep2017" +struct eth_regs { + u32 ethintfc; + u32 ethfifott; + u32 etharc; + u32 ethhash0; + u32 ethhash1; + u32 ethu0[4]; /* Reserved. */ + u32 ethpfs; + u32 ethmcp; + u32 eth_u1[10]; /* Reserved. */ + u32 ethspare; + u32 eth_u2[42]; /* Reserved. */ + u32 ethsal0; + u32 ethsah0; + u32 ethsal1; + u32 ethsah1; + u32 ethsal2; + u32 ethsah2; + u32 ethsal3; + u32 ethsah3; + u32 ethrbc; + u32 ethrpc; + u32 ethrupc; + u32 ethrfc; + u32 ethtbc; + u32 ethgpf; + u32 eth_u9[50]; /* Reserved. */ + u32 ethmac1; + u32 ethmac2; + u32 ethipgt; + u32 ethipgr; + u32 ethclrt; + u32 ethmaxf; + u32 eth_u10; /* Reserved. */ + u32 ethmtest; + u32 miimcfg; + u32 miimcmd; + u32 miimaddr; + u32 miimwtd; + u32 miimrdd; + u32 miimind; + u32 eth_u11; /* Reserved. */ + u32 eth_u12; /* Reserved. */ + u32 ethcfsa0; + u32 ethcfsa1; + u32 ethcfsa2; +}; + +/* Ethernet interrupt registers */ +#define ETH_INT_FC_EN BIT(0) +#define ETH_INT_FC_ITS BIT(1) +#define ETH_INT_FC_RIP BIT(2) +#define ETH_INT_FC_JAM BIT(3) +#define ETH_INT_FC_OVR BIT(4) +#define ETH_INT_FC_UND BIT(5) +#define ETH_INT_FC_IOC 0x000000c0 + +/* Ethernet FIFO registers */ +#define ETH_FIFI_TT_TTH_BIT 0 +#define ETH_FIFO_TT_TTH 0x0000007f + +/* Ethernet ARC/multicast registers */ +#define ETH_ARC_PRO BIT(0) +#define ETH_ARC_AM BIT(1) +#define ETH_ARC_AFM BIT(2) +#define ETH_ARC_AB BIT(3) + +/* Ethernet SAL registers */ +#define ETH_SAL_BYTE_5 0x000000ff +#define ETH_SAL_BYTE_4 0x0000ff00 +#define ETH_SAL_BYTE_3 0x00ff0000 +#define ETH_SAL_BYTE_2 0xff000000 + +/* Ethernet SAH registers */ +#define ETH_SAH_BYTE1 0x000000ff +#define ETH_SAH_BYTE0 0x0000ff00 + +/* Ethernet GPF register */ +#define ETH_GPF_PTV 0x0000ffff + +/* Ethernet PFG register */ +#define ETH_PFS_PFD BIT(0) + +/* Ethernet CFSA[0-3] registers */ +#define ETH_CFSA0_CFSA4 0x000000ff +#define ETH_CFSA0_CFSA5 0x0000ff00 +#define ETH_CFSA1_CFSA2 0x000000ff +#define ETH_CFSA1_CFSA3 0x0000ff00 +#define ETH_CFSA1_CFSA0 0x000000ff +#define ETH_CFSA1_CFSA1 0x0000ff00 + +/* Ethernet MAC1 registers */ +#define ETH_MAC1_RE BIT(0) +#define ETH_MAC1_PAF BIT(1) +#define ETH_MAC1_RFC BIT(2) +#define ETH_MAC1_TFC BIT(3) +#define ETH_MAC1_LB BIT(4) +#define ETH_MAC1_MR BIT(31) + +/* Ethernet MAC2 registers */ +#define ETH_MAC2_FD BIT(0) +#define ETH_MAC2_FLC BIT(1) +#define ETH_MAC2_HFE BIT(2) +#define ETH_MAC2_DC BIT(3) +#define ETH_MAC2_CEN BIT(4) +#define ETH_MAC2_PE BIT(5) +#define ETH_MAC2_VPE BIT(6) +#define ETH_MAC2_APE BIT(7) +#define ETH_MAC2_PPE BIT(8) +#define ETH_MAC2_LPE BIT(9) +#define ETH_MAC2_NB BIT(12) +#define ETH_MAC2_BP BIT(13) +#define ETH_MAC2_ED BIT(14) + +/* Ethernet IPGT register */ +#define ETH_IPGT 0x0000007f + +/* Ethernet IPGR registers */ +#define ETH_IPGR_IPGR2 0x0000007f +#define ETH_IPGR_IPGR1 0x00007f00 + +/* Ethernet CLRT registers */ +#define ETH_CLRT_MAX_RET 0x0000000f +#define ETH_CLRT_COL_WIN 0x00003f00 + +/* Ethernet MAXF register */ +#define ETH_MAXF 0x0000ffff + +/* Ethernet test registers */ +#define ETH_TEST_REG BIT(2) +#define ETH_MCP_DIV 0x000000ff + +/* MII registers */ +#define ETH_MII_CFG_RSVD 0x0000000c +#define ETH_MII_CMD_RD BIT(0) +#define ETH_MII_CMD_SCN BIT(1) +#define ETH_MII_REG_ADDR 0x0000001f +#define ETH_MII_PHY_ADDR 0x00001f00 +#define ETH_MII_WTD_DATA 0x0000ffff +#define ETH_MII_RDD_DATA 0x0000ffff +#define ETH_MII_IND_BSY BIT(0) +#define ETH_MII_IND_SCN BIT(1) +#define ETH_MII_IND_NV BIT(2) + +/* Values for the DEVCS field of the Ethernet DMA Rx and Tx descriptors. */ +#define ETH_RX_FD BIT(0) +#define ETH_RX_LD BIT(1) +#define ETH_RX_ROK BIT(2) +#define ETH_RX_FM BIT(3) +#define ETH_RX_MP BIT(4) +#define ETH_RX_BP BIT(5) +#define ETH_RX_VLT BIT(6) +#define ETH_RX_CF BIT(7) +#define ETH_RX_OVR BIT(8) +#define ETH_RX_CRC BIT(9) +#define ETH_RX_CV BIT(10) +#define ETH_RX_DB BIT(11) +#define ETH_RX_LE BIT(12) +#define ETH_RX_LOR BIT(13) +#define ETH_RX_CES BIT(14) +#define ETH_RX_LEN_BIT 16 +#define ETH_RX_LEN 0xffff0000 + +#define ETH_TX_FD BIT(0) +#define ETH_TX_LD BIT(1) +#define ETH_TX_OEN BIT(2) +#define ETH_TX_PEN BIT(3) +#define ETH_TX_CEN BIT(4) +#define ETH_TX_HEN BIT(5) +#define ETH_TX_TOK BIT(6) +#define ETH_TX_MP BIT(7) +#define ETH_TX_BP BIT(8) +#define ETH_TX_UND BIT(9) +#define ETH_TX_OF BIT(10) +#define ETH_TX_ED BIT(11) +#define ETH_TX_EC BIT(12) +#define ETH_TX_LC BIT(13) +#define ETH_TX_TD BIT(14) +#define ETH_TX_CRC BIT(15) +#define ETH_TX_LE BIT(16) +#define ETH_TX_CC 0x001E0000 + +/* DMA descriptor (in physical memory). */ +struct dma_desc { + u32 control; /* Control. use DMAD_* */ + u32 ca; /* Current Address. */ + u32 devcs; /* Device control and status. */ + u32 link; /* Next descriptor in chain. */ +}; + +#define DMA_DESC_COUNT_BIT 0 +#define DMA_DESC_COUNT_MSK 0x0003ffff +#define DMA_DESC_DS_BIT 20 +#define DMA_DESC_DS_MSK 0x00300000 + +#define DMA_DESC_DEV_CMD_BIT 22 +#define DMA_DESC_DEV_CMD_MSK 0x01c00000 + +/* DMA descriptors interrupts */ +#define DMA_DESC_COF BIT(25) /* Chain on finished */ +#define DMA_DESC_COD BIT(26) /* Chain on done */ +#define DMA_DESC_IOF BIT(27) /* Interrupt on finished */ +#define DMA_DESC_IOD BIT(28) /* Interrupt on done */ +#define DMA_DESC_TERM BIT(29) /* Terminated */ +#define DMA_DESC_DONE BIT(30) /* Done */ +#define DMA_DESC_FINI BIT(31) /* Finished */ + +/* DMA register (within Internal Register Map). */ +struct dma_reg { + u32 dmac; /* Control. */ + u32 dmas; /* Status. */ + u32 dmasm; /* Mask. */ + u32 dmadptr; /* Descriptor pointer. */ + u32 dmandptr; /* Next descriptor pointer. */ +}; + +/* DMA channels specific registers */ +#define DMA_CHAN_RUN_BIT BIT(0) +#define DMA_CHAN_DONE_BIT BIT(1) +#define DMA_CHAN_MODE_BIT BIT(2) +#define DMA_CHAN_MODE_MSK 0x0000000c +#define DMA_CHAN_MODE_AUTO 0 +#define DMA_CHAN_MODE_BURST 1 +#define DMA_CHAN_MODE_XFRT 2 +#define DMA_CHAN_MODE_RSVD 3 +#define DMA_CHAN_ACT_BIT BIT(4) + +/* DMA status registers */ +#define DMA_STAT_FINI BIT(0) +#define DMA_STAT_DONE BIT(1) +#define DMA_STAT_CHAIN BIT(2) +#define DMA_STAT_ERR BIT(3) +#define DMA_STAT_HALT BIT(4) + #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) #define STATION_ADDRESS_LOW(dev) (((dev)->dev_addr[2] << 24) | \ @@ -95,24 +323,30 @@ enum chain_status { desc_filled, - desc_empty + desc_is_empty }; +#define DMA_COUNT(count) ((count) & DMA_DESC_COUNT_MSK) #define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) #define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) #define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) /* Information that need to be kept for each board. */ struct korina_private { - struct eth_regs *eth_regs; - struct dma_reg *rx_dma_regs; - struct dma_reg *tx_dma_regs; + struct eth_regs __iomem *eth_regs; + struct dma_reg __iomem *rx_dma_regs; + struct dma_reg __iomem *tx_dma_regs; struct dma_desc *td_ring; /* transmit descriptor ring */ struct dma_desc *rd_ring; /* receive descriptor ring */ + dma_addr_t td_dma; + dma_addr_t rd_dma; struct sk_buff *tx_skb[KORINA_NUM_TDS]; struct sk_buff *rx_skb[KORINA_NUM_RDS]; + dma_addr_t rx_skb_dma[KORINA_NUM_RDS]; + dma_addr_t tx_skb_dma[KORINA_NUM_TDS]; + int rx_next_done; int rx_chain_head; int rx_chain_tail; @@ -137,15 +371,18 @@ struct korina_private { struct mii_if_info mii_if; struct work_struct restart_task; struct net_device *dev; - int phy_addr; + struct device *dmadev; + int mii_clock_freq; }; -extern unsigned int idt_cpu_freq; +static dma_addr_t korina_tx_dma(struct korina_private *lp, int idx) +{ + return lp->td_dma + (idx * sizeof(struct dma_desc)); +} -static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr) +static dma_addr_t korina_rx_dma(struct korina_private *lp, int idx) { - writel(0, &ch->dmandptr); - writel(dma_addr, &ch->dmadptr); + return lp->rd_dma + (idx * sizeof(struct dma_desc)); } static inline void korina_abort_dma(struct net_device *dev, @@ -164,11 +401,6 @@ static inline void korina_abort_dma(struct net_device *dev, writel(0, &ch->dmandptr); } -static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr) -{ - writel(dma_addr, &ch->dmandptr); -} - static void korina_abort_tx(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); @@ -183,30 +415,21 @@ static void korina_abort_rx(struct net_device *dev) korina_abort_dma(dev, lp->rx_dma_regs); } -static void korina_start_rx(struct korina_private *lp, - struct dma_desc *rd) -{ - korina_start_dma(lp->rx_dma_regs, CPHYSADDR(rd)); -} - -static void korina_chain_rx(struct korina_private *lp, - struct dma_desc *rd) -{ - korina_chain_dma(lp->rx_dma_regs, CPHYSADDR(rd)); -} - /* transmit packet */ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); - unsigned long flags; - u32 length; u32 chain_prev, chain_next; + unsigned long flags; struct dma_desc *td; + dma_addr_t ca; + u32 length; + int idx; spin_lock_irqsave(&lp->lock, flags); - td = &lp->td_ring[lp->tx_chain_tail]; + idx = lp->tx_chain_tail; + td = &lp->td_ring[idx]; /* stop queue when full, drop pkts if queue already full */ if (lp->tx_count >= (KORINA_NUM_TDS - 2)) { @@ -214,38 +437,37 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) if (lp->tx_count == (KORINA_NUM_TDS - 2)) netif_stop_queue(dev); - else { - dev->stats.tx_dropped++; - dev_kfree_skb_any(skb); - spin_unlock_irqrestore(&lp->lock, flags); - - return NETDEV_TX_OK; - } + else + goto drop_packet; } lp->tx_count++; - lp->tx_skb[lp->tx_chain_tail] = skb; + lp->tx_skb[idx] = skb; length = skb->len; - dma_cache_wback((u32)skb->data, skb->len); /* Setup the transmit descriptor. */ - dma_cache_inv((u32) td, sizeof(*td)); - td->ca = CPHYSADDR(skb->data); - chain_prev = (lp->tx_chain_tail - 1) & KORINA_TDS_MASK; - chain_next = (lp->tx_chain_tail + 1) & KORINA_TDS_MASK; + ca = dma_map_single(lp->dmadev, skb->data, length, DMA_TO_DEVICE); + if (dma_mapping_error(lp->dmadev, ca)) + goto drop_packet; + + lp->tx_skb_dma[idx] = ca; + td->ca = ca; + + chain_prev = (idx - 1) & KORINA_TDS_MASK; + chain_next = (idx + 1) & KORINA_TDS_MASK; if (readl(&(lp->tx_dma_regs->dmandptr)) == 0) { - if (lp->tx_chain_status == desc_empty) { + if (lp->tx_chain_status == desc_is_empty) { /* Update tail */ td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; /* Move tail */ lp->tx_chain_tail = chain_next; /* Write to NDPTR */ - writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), - &lp->tx_dma_regs->dmandptr); + writel(korina_tx_dma(lp, lp->tx_chain_head), + &lp->tx_dma_regs->dmandptr); /* Move head to tail */ lp->tx_chain_head = lp->tx_chain_tail; } else { @@ -256,18 +478,18 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) lp->td_ring[chain_prev].control &= ~DMA_DESC_COF; /* Link to prev */ - lp->td_ring[chain_prev].link = CPHYSADDR(td); + lp->td_ring[chain_prev].link = korina_tx_dma(lp, idx); /* Move tail */ lp->tx_chain_tail = chain_next; /* Write to NDPTR */ - writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), - &(lp->tx_dma_regs->dmandptr)); + writel(korina_tx_dma(lp, lp->tx_chain_head), + &lp->tx_dma_regs->dmandptr); /* Move head to tail */ lp->tx_chain_head = lp->tx_chain_tail; - lp->tx_chain_status = desc_empty; + lp->tx_chain_status = desc_is_empty; } } else { - if (lp->tx_chain_status == desc_empty) { + if (lp->tx_chain_status == desc_is_empty) { /* Update tail */ td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; @@ -280,44 +502,66 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) DMA_DESC_COF | DMA_DESC_IOF; lp->td_ring[chain_prev].control &= ~DMA_DESC_COF; - lp->td_ring[chain_prev].link = CPHYSADDR(td); + lp->td_ring[chain_prev].link = korina_tx_dma(lp, idx); lp->tx_chain_tail = chain_next; } } - dma_cache_wback((u32) td, sizeof(*td)); netif_trans_update(dev); spin_unlock_irqrestore(&lp->lock, flags); return NETDEV_TX_OK; + +drop_packet: + dev->stats.tx_dropped++; + dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&lp->lock, flags); + + return NETDEV_TX_OK; } -static int mdio_read(struct net_device *dev, int mii_id, int reg) +static int korina_mdio_wait(struct korina_private *lp) +{ + u32 value; + + return readl_poll_timeout_atomic(&lp->eth_regs->miimind, + value, value & ETH_MII_IND_BSY, + 1, 1000); +} + +static int korina_mdio_read(struct net_device *dev, int phy, int reg) { struct korina_private *lp = netdev_priv(dev); int ret; - mii_id = ((lp->rx_irq == 0x2c ? 1 : 0) << 8); + ret = korina_mdio_wait(lp); + if (ret < 0) + return ret; - writel(0, &lp->eth_regs->miimcfg); - writel(0, &lp->eth_regs->miimcmd); - writel(mii_id | reg, &lp->eth_regs->miimaddr); - writel(ETH_MII_CMD_SCN, &lp->eth_regs->miimcmd); + writel(phy << 8 | reg, &lp->eth_regs->miimaddr); + writel(1, &lp->eth_regs->miimcmd); + + ret = korina_mdio_wait(lp); + if (ret < 0) + return ret; - ret = (int)(readl(&lp->eth_regs->miimrdd)); + if (readl(&lp->eth_regs->miimind) & ETH_MII_IND_NV) + return -EINVAL; + + ret = readl(&lp->eth_regs->miimrdd); + writel(0, &lp->eth_regs->miimcmd); return ret; } -static void mdio_write(struct net_device *dev, int mii_id, int reg, int val) +static void korina_mdio_write(struct net_device *dev, int phy, int reg, int val) { struct korina_private *lp = netdev_priv(dev); - mii_id = ((lp->rx_irq == 0x2c ? 1 : 0) << 8); + if (korina_mdio_wait(lp)) + return; - writel(0, &lp->eth_regs->miimcfg); - writel(1, &lp->eth_regs->miimcmd); - writel(mii_id | reg, &lp->eth_regs->miimaddr); - writel(ETH_MII_CMD_SCN, &lp->eth_regs->miimcmd); + writel(0, &lp->eth_regs->miimcmd); + writel(phy << 8 | reg, &lp->eth_regs->miimaddr); writel(val, &lp->eth_regs->miimwtd); } @@ -353,12 +597,10 @@ static int korina_rx(struct net_device *dev, int limit) struct korina_private *lp = netdev_priv(dev); struct dma_desc *rd = &lp->rd_ring[lp->rx_next_done]; struct sk_buff *skb, *skb_new; - u8 *pkt_buf; u32 devcs, pkt_len, dmas; + dma_addr_t ca; int count; - dma_cache_inv((u32)rd, sizeof(*rd)); - for (count = 0; count < limit; count++) { skb = lp->rx_skb[lp->rx_next_done]; skb_new = NULL; @@ -392,20 +634,22 @@ static int korina_rx(struct net_device *dev, int limit) goto next; } - pkt_len = RCVPKT_LENGTH(devcs); - - /* must be the (first and) last - * descriptor then */ - pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; - - /* invalidate the cache */ - dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); - /* Malloc up new buffer. */ skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); - if (!skb_new) break; + + ca = dma_map_single(lp->dmadev, skb_new->data, KORINA_RBSIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(lp->dmadev, ca)) { + dev_kfree_skb_any(skb_new); + break; + } + + pkt_len = RCVPKT_LENGTH(devcs); + dma_unmap_single(lp->dmadev, lp->rx_skb_dma[lp->rx_next_done], + pkt_len, DMA_FROM_DEVICE); + /* Do not count the CRC */ skb_put(skb, pkt_len - 4); skb->protocol = eth_type_trans(skb, dev); @@ -420,15 +664,13 @@ static int korina_rx(struct net_device *dev, int limit) dev->stats.multicast++; lp->rx_skb[lp->rx_next_done] = skb_new; + lp->rx_skb_dma[lp->rx_next_done] = ca; next: rd->devcs = 0; /* Restore descriptor's curr_addr */ - if (skb_new) - rd->ca = CPHYSADDR(skb_new->data); - else - rd->ca = CPHYSADDR(skb->data); + rd->ca = lp->rx_skb_dma[lp->rx_next_done]; rd->control = DMA_COUNT(KORINA_RBSIZE) | DMA_DESC_COD | DMA_DESC_IOD; @@ -437,23 +679,21 @@ next: ~DMA_DESC_COD; lp->rx_next_done = (lp->rx_next_done + 1) & KORINA_RDS_MASK; - dma_cache_wback((u32)rd, sizeof(*rd)); rd = &lp->rd_ring[lp->rx_next_done]; - writel(~DMA_STAT_DONE, &lp->rx_dma_regs->dmas); + writel((u32)~DMA_STAT_DONE, &lp->rx_dma_regs->dmas); } dmas = readl(&lp->rx_dma_regs->dmas); if (dmas & DMA_STAT_HALT) { - writel(~(DMA_STAT_HALT | DMA_STAT_ERR), - &lp->rx_dma_regs->dmas); + writel((u32)~(DMA_STAT_HALT | DMA_STAT_ERR), + &lp->rx_dma_regs->dmas); lp->dma_halt_cnt++; rd->devcs = 0; - skb = lp->rx_skb[lp->rx_next_done]; - rd->ca = CPHYSADDR(skb->data); - dma_cache_wback((u32)rd, sizeof(*rd)); - korina_chain_rx(lp, rd); + rd->ca = lp->rx_skb_dma[lp->rx_next_done]; + writel(korina_rx_dma(lp, rd - lp->rd_ring), + &lp->rx_dma_regs->dmandptr); } return count; @@ -576,6 +816,10 @@ static void korina_tx(struct net_device *dev) /* We must always free the original skb */ if (lp->tx_skb[lp->tx_next_done]) { + dma_unmap_single(lp->dmadev, + lp->tx_skb_dma[lp->tx_next_done], + lp->tx_skb[lp->tx_next_done]->len, + DMA_TO_DEVICE); dev_kfree_skb_any(lp->tx_skb[lp->tx_next_done]); lp->tx_skb[lp->tx_next_done] = NULL; } @@ -622,9 +866,9 @@ korina_tx_dma_interrupt(int irq, void *dev_id) if (lp->tx_chain_status == desc_filled && (readl(&(lp->tx_dma_regs->dmandptr)) == 0)) { - writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), - &(lp->tx_dma_regs->dmandptr)); - lp->tx_chain_status = desc_empty; + writel(korina_tx_dma(lp, lp->tx_chain_head), + &lp->tx_dma_regs->dmandptr); + lp->tx_chain_status = desc_is_empty; lp->tx_chain_head = lp->tx_chain_tail; netif_trans_update(dev); } @@ -643,7 +887,7 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media) { struct korina_private *lp = netdev_priv(dev); - mii_check_media(&lp->mii_if, 0, init_media); + mii_check_media(&lp->mii_if, 1, init_media); if (lp->mii_if.full_duplex) writel(readl(&lp->eth_regs->ethmac2) | ETH_MAC2_FD, @@ -743,6 +987,7 @@ static int korina_alloc_ring(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); struct sk_buff *skb; + dma_addr_t ca; int i; /* Initialize the transmit descriptors */ @@ -754,7 +999,7 @@ static int korina_alloc_ring(struct net_device *dev) } lp->tx_next_done = lp->tx_chain_head = lp->tx_chain_tail = lp->tx_full = lp->tx_count = 0; - lp->tx_chain_status = desc_empty; + lp->tx_chain_status = desc_is_empty; /* Initialize the receive descriptors */ for (i = 0; i < KORINA_NUM_RDS; i++) { @@ -765,19 +1010,24 @@ static int korina_alloc_ring(struct net_device *dev) lp->rd_ring[i].control = DMA_DESC_IOD | DMA_COUNT(KORINA_RBSIZE); lp->rd_ring[i].devcs = 0; - lp->rd_ring[i].ca = CPHYSADDR(skb->data); - lp->rd_ring[i].link = CPHYSADDR(&lp->rd_ring[i+1]); + ca = dma_map_single(lp->dmadev, skb->data, KORINA_RBSIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(lp->dmadev, ca)) + return -ENOMEM; + lp->rd_ring[i].ca = ca; + lp->rx_skb_dma[i] = ca; + lp->rd_ring[i].link = korina_rx_dma(lp, i + 1); } /* loop back receive descriptors, so the last * descriptor points to the first one */ - lp->rd_ring[i - 1].link = CPHYSADDR(&lp->rd_ring[0]); + lp->rd_ring[i - 1].link = lp->rd_dma; lp->rd_ring[i - 1].control |= DMA_DESC_COD; lp->rx_next_done = 0; lp->rx_chain_head = 0; lp->rx_chain_tail = 0; - lp->rx_chain_status = desc_empty; + lp->rx_chain_status = desc_is_empty; return 0; } @@ -789,16 +1039,22 @@ static void korina_free_ring(struct net_device *dev) for (i = 0; i < KORINA_NUM_RDS; i++) { lp->rd_ring[i].control = 0; - if (lp->rx_skb[i]) + if (lp->rx_skb[i]) { + dma_unmap_single(lp->dmadev, lp->rx_skb_dma[i], + KORINA_RBSIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(lp->rx_skb[i]); - lp->rx_skb[i] = NULL; + lp->rx_skb[i] = NULL; + } } for (i = 0; i < KORINA_NUM_TDS; i++) { lp->td_ring[i].control = 0; - if (lp->tx_skb[i]) + if (lp->tx_skb[i]) { + dma_unmap_single(lp->dmadev, lp->tx_skb_dma[i], + lp->tx_skb[i]->len, DMA_TO_DEVICE); dev_kfree_skb_any(lp->tx_skb[i]); - lp->tx_skb[i] = NULL; + lp->tx_skb[i] = NULL; + } } } @@ -830,7 +1086,8 @@ static int korina_init(struct net_device *dev) writel(0, &lp->rx_dma_regs->dmas); /* Start Rx DMA */ - korina_start_rx(lp, &lp->rd_ring[0]); + writel(0, &lp->rx_dma_regs->dmandptr); + writel(korina_rx_dma(lp, 0), &lp->rx_dma_regs->dmadptr); writel(readl(&lp->tx_dma_regs->dmasm) & ~(DMA_STAT_FINI | DMA_STAT_ERR), @@ -867,14 +1124,17 @@ static int korina_init(struct net_device *dev) /* Management Clock Prescaler Divisor * Clock independent setting */ - writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1, - &lp->eth_regs->ethmcp); + writel(((lp->mii_clock_freq) / MII_CLOCK + 1) & ~1, + &lp->eth_regs->ethmcp); + writel(0, &lp->eth_regs->miimcfg); /* don't transmit until fifo contains 48b */ writel(48, &lp->eth_regs->ethfifott); writel(ETH_MAC1_RE, &lp->eth_regs->ethmac1); + korina_check_media(dev, 1); + napi_enable(&lp->napi); netif_start_queue(dev); @@ -1022,86 +1282,94 @@ static const struct net_device_ops korina_netdev_ops = { static int korina_probe(struct platform_device *pdev) { - struct korina_device *bif = platform_get_drvdata(pdev); + u8 *mac_addr = dev_get_platdata(&pdev->dev); struct korina_private *lp; struct net_device *dev; - struct resource *r; + struct clk *clk; + void __iomem *p; int rc; - dev = alloc_etherdev(sizeof(struct korina_private)); + dev = devm_alloc_etherdev(&pdev->dev, sizeof(struct korina_private)); if (!dev) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); lp = netdev_priv(dev); - bif->dev = dev; - memcpy(dev->dev_addr, bif->mac, ETH_ALEN); + if (mac_addr) + ether_addr_copy(dev->dev_addr, mac_addr); + else if (of_get_mac_address(pdev->dev.of_node, dev->dev_addr) < 0) + eth_hw_addr_random(dev); + + clk = devm_clk_get_optional(&pdev->dev, "mdioclk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + if (clk) { + clk_prepare_enable(clk); + lp->mii_clock_freq = clk_get_rate(clk); + } else { + lp->mii_clock_freq = 200000000; /* max possible input clk */ + } - lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx"); - lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx"); + lp->rx_irq = platform_get_irq_byname(pdev, "rx"); + lp->tx_irq = platform_get_irq_byname(pdev, "tx"); - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs"); - dev->base_addr = r->start; - lp->eth_regs = ioremap(r->start, resource_size(r)); - if (!lp->eth_regs) { + p = devm_platform_ioremap_resource_byname(pdev, "emac"); + if (!p) { printk(KERN_ERR DRV_NAME ": cannot remap registers\n"); - rc = -ENXIO; - goto probe_err_out; + return -ENOMEM; } + lp->eth_regs = p; - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_rx"); - lp->rx_dma_regs = ioremap(r->start, resource_size(r)); - if (!lp->rx_dma_regs) { + p = devm_platform_ioremap_resource_byname(pdev, "dma_rx"); + if (!p) { printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n"); - rc = -ENXIO; - goto probe_err_dma_rx; + return -ENOMEM; } + lp->rx_dma_regs = p; - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_tx"); - lp->tx_dma_regs = ioremap(r->start, resource_size(r)); - if (!lp->tx_dma_regs) { + p = devm_platform_ioremap_resource_byname(pdev, "dma_tx"); + if (!p) { printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n"); - rc = -ENXIO; - goto probe_err_dma_tx; - } - - lp->td_ring = kmalloc(TD_RING_SIZE + RD_RING_SIZE, GFP_KERNEL); - if (!lp->td_ring) { - rc = -ENXIO; - goto probe_err_td_ring; + return -ENOMEM; } + lp->tx_dma_regs = p; - dma_cache_inv((unsigned long)(lp->td_ring), - TD_RING_SIZE + RD_RING_SIZE); + lp->td_ring = dmam_alloc_coherent(&pdev->dev, TD_RING_SIZE, + &lp->td_dma, GFP_KERNEL); + if (!lp->td_ring) + return -ENOMEM; - /* now convert TD_RING pointer to KSEG1 */ - lp->td_ring = (struct dma_desc *)KSEG1ADDR(lp->td_ring); - lp->rd_ring = &lp->td_ring[KORINA_NUM_TDS]; + lp->rd_ring = dmam_alloc_coherent(&pdev->dev, RD_RING_SIZE, + &lp->rd_dma, GFP_KERNEL); + if (!lp->rd_ring) + return -ENOMEM; spin_lock_init(&lp->lock); /* just use the rx dma irq */ dev->irq = lp->rx_irq; lp->dev = dev; + lp->dmadev = &pdev->dev; dev->netdev_ops = &korina_netdev_ops; dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT); - lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05); lp->mii_if.dev = dev; - lp->mii_if.mdio_read = mdio_read; - lp->mii_if.mdio_write = mdio_write; - lp->mii_if.phy_id = lp->phy_addr; + lp->mii_if.mdio_read = korina_mdio_read; + lp->mii_if.mdio_write = korina_mdio_write; + lp->mii_if.phy_id = 1; lp->mii_if.phy_id_mask = 0x1f; lp->mii_if.reg_num_mask = 0x1f; + platform_set_drvdata(pdev, dev); + rc = register_netdev(dev); if (rc < 0) { printk(KERN_ERR DRV_NAME ": cannot register net device: %d\n", rc); - goto probe_err_register; + return rc; } timer_setup(&lp->media_check_timer, korina_poll_media, 0); @@ -1109,40 +1377,33 @@ static int korina_probe(struct platform_device *pdev) printk(KERN_INFO "%s: " DRV_NAME "-" DRV_VERSION " " DRV_RELDATE "\n", dev->name); -out: return rc; - -probe_err_register: - kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring)); -probe_err_td_ring: - iounmap(lp->tx_dma_regs); -probe_err_dma_tx: - iounmap(lp->rx_dma_regs); -probe_err_dma_rx: - iounmap(lp->eth_regs); -probe_err_out: - free_netdev(dev); - goto out; } static int korina_remove(struct platform_device *pdev) { - struct korina_device *bif = platform_get_drvdata(pdev); - struct korina_private *lp = netdev_priv(bif->dev); - - iounmap(lp->eth_regs); - iounmap(lp->rx_dma_regs); - iounmap(lp->tx_dma_regs); - kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring)); + struct net_device *dev = platform_get_drvdata(pdev); - unregister_netdev(bif->dev); - free_netdev(bif->dev); + unregister_netdev(dev); return 0; } +#ifdef CONFIG_OF +static const struct of_device_id korina_match[] = { + { + .compatible = "idt,3243x-emac", + }, + { } +}; +MODULE_DEVICE_TABLE(of, korina_match); +#endif + static struct platform_driver korina_driver = { - .driver.name = "korina", + .driver = { + .name = "korina", + .of_match_table = of_match_ptr(korina_match), + }, .probe = korina_probe, .remove = korina_remove, }; diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 0f8ef8f1232c..41c2ad210bc9 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -435,7 +435,6 @@ static int xrx200_probe(struct platform_device *pdev) struct resource *res; struct xrx200_priv *priv; struct net_device *net_dev; - const u8 *mac; int err; /* alloc the network device */ @@ -477,10 +476,8 @@ static int xrx200_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - mac = of_get_mac_address(np); - if (!IS_ERR(mac)) - ether_addr_copy(net_dev->dev_addr, mac); - else + err = of_get_mac_address(np, net_dev->dev_addr); + if (err) eth_hw_addr_random(net_dev); /* bring up the dma engine and IP core */ diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index ca1681aa951a..d207bfcaf31d 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2702,7 +2702,6 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, struct platform_device *ppdev; struct mv643xx_eth_platform_data ppd; struct resource res; - const char *mac_addr; int ret; int dev_num = 0; @@ -2733,9 +2732,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, return -EINVAL; } - mac_addr = of_get_mac_address(pnp); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ppd.mac_addr, mac_addr); + of_get_mac_address(pnp, ppd.mac_addr); mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f20dfd1d7a6b..7d5cd9bc6c99 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5141,7 +5141,6 @@ static int mvneta_probe(struct platform_device *pdev) struct net_device *dev; struct phylink *phylink; struct phy *comphy; - const char *dt_mac_addr; char hw_mac_addr[ETH_ALEN]; phy_interface_t phy_mode; const char *mac_from; @@ -5237,10 +5236,9 @@ static int mvneta_probe(struct platform_device *pdev) goto err_free_ports; } - dt_mac_addr = of_get_mac_address(dn); - if (!IS_ERR(dt_mac_addr)) { + err = of_get_mac_address(dn, dev->dev_addr); + if (!err) { mac_from = "device tree"; - ether_addr_copy(dev->dev_addr, dt_mac_addr); } else { mvneta_get_mac_addr(pp, hw_mac_addr); if (is_valid_ether_addr(hw_mac_addr)) { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 4812cdb4609e..7cc7d72d761e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -918,9 +918,8 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); - /* Set L4 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, - sizeof(struct iphdr) - 4, + /* Set L3 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -4, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); mvpp2_prs_sram_ri_update(&pe, ri, ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK); @@ -1335,7 +1334,7 @@ static void mvpp2_prs_vid_init(struct mvpp2 *priv) static int mvpp2_prs_etype_init(struct mvpp2 *priv) { struct mvpp2_prs_entry pe; - int tid; + int tid, ihl; /* Ethertype: PPPoE */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, @@ -1427,67 +1426,43 @@ static int mvpp2_prs_etype_init(struct mvpp2 *priv) MVPP2_PRS_RI_UDF3_MASK); mvpp2_prs_hw_write(priv, &pe); - /* Ethertype: IPv4 without options */ - tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, - MVPP2_PE_LAST_FREE_TID); - if (tid < 0) - return tid; - - memset(&pe, 0, sizeof(pe)); - mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2); - pe.index = tid; - - mvpp2_prs_match_etype(&pe, 0, ETH_P_IP); - mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL, - MVPP2_PRS_IPV4_HEAD_MASK | - MVPP2_PRS_IPV4_IHL_MASK); - - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); - mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, - MVPP2_PRS_RI_L3_PROTO_MASK); - /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + - sizeof(struct iphdr) - 4, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); - /* Set L3 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, - MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - - /* Update shadow table and hw entry */ - mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2); - priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF; - priv->prs_shadow[pe.index].finish = false; - mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4, - MVPP2_PRS_RI_L3_PROTO_MASK); - mvpp2_prs_hw_write(priv, &pe); - - /* Ethertype: IPv4 with options */ - tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, - MVPP2_PE_LAST_FREE_TID); - if (tid < 0) - return tid; - - pe.index = tid; + /* Ethertype: IPv4 with header length >= 5 */ + for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) { + tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, + MVPP2_PE_LAST_FREE_TID); + if (tid < 0) + return tid; - mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD, - MVPP2_PRS_IPV4_HEAD_MASK); + memset(&pe, 0, sizeof(pe)); + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2); + pe.index = tid; - /* Clear ri before updating */ - pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0; - pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0; - mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT, - MVPP2_PRS_RI_L3_PROTO_MASK); + mvpp2_prs_match_etype(&pe, 0, ETH_P_IP); + mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, + MVPP2_PRS_IPV4_HEAD | ihl, + MVPP2_PRS_IPV4_HEAD_MASK | + MVPP2_PRS_IPV4_IHL_MASK); + + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, + MVPP2_PRS_RI_L3_PROTO_MASK); + /* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + + sizeof(struct iphdr) - 4, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Set L4 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, + MVPP2_ETH_TYPE_LEN + (ihl * 4), + MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - /* Update shadow table and hw entry */ - mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2); - priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF; - priv->prs_shadow[pe.index].finish = false; - mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4_OPT, - MVPP2_PRS_RI_L3_PROTO_MASK); - mvpp2_prs_hw_write(priv, &pe); + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2); + priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF; + priv->prs_shadow[pe.index].finish = false; + mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4, + MVPP2_PRS_RI_L3_PROTO_MASK); + mvpp2_prs_hw_write(priv, &pe); + } /* Ethertype: IPv6 without options */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, @@ -1674,7 +1649,8 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) pe.index = tid; mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL, + MVPP2_PRS_IPV4_HEAD | + MVPP2_PRS_IPV4_IHL_MIN, MVPP2_PRS_IPV4_HEAD_MASK | MVPP2_PRS_IPV4_IHL_MASK); @@ -1788,9 +1764,8 @@ static int mvpp2_prs_ip4_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); - /* Set L4 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, - sizeof(struct iphdr) - 4, + /* Set L3 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -4, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h index c16e5b9947bd..5ce5907be591 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h @@ -28,7 +28,8 @@ #define MVPP2_PRS_IPV4_MC 0xe0 #define MVPP2_PRS_IPV4_MC_MASK 0xf0 #define MVPP2_PRS_IPV4_BC_MASK 0xff -#define MVPP2_PRS_IPV4_IHL 0x5 +#define MVPP2_PRS_IPV4_IHL_MIN 0x5 +#define MVPP2_PRS_IPV4_IHL_MAX 0xf #define MVPP2_PRS_IPV4_IHL_MASK 0xf #define MVPP2_PRS_IPV6_MC 0xff #define MVPP2_PRS_IPV6_MC_MASK 0xff diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 25dd903a3e92..f08c420a5803 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -456,20 +456,17 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw) { struct device_node *base_mac_np; struct device_node *np; - const char *base_mac; + int ret; np = of_find_compatible_node(NULL, NULL, "marvell,prestera"); base_mac_np = of_parse_phandle(np, "base-mac-provider", 0); - base_mac = of_get_mac_address(base_mac_np); - of_node_put(base_mac_np); - if (!IS_ERR(base_mac)) - ether_addr_copy(sw->base_mac, base_mac); - - if (!is_valid_ether_addr(sw->base_mac)) { + ret = of_get_mac_address(base_mac_np, sw->base_mac); + if (ret) { eth_random_addr(sw->base_mac); dev_info(prestera_dev(sw), "using random base mac address\n"); } + of_node_put(base_mac_np); return prestera_hw_switch_mac_set(sw, sw->base_mac); } diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index be5677623455..298110119272 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -756,6 +756,7 @@ static void prestera_pci_remove(struct pci_dev *pdev) static const struct pci_device_id prestera_pci_devices[] = { { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC804) }, + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC80C) }, { } }; MODULE_DEVICE_TABLE(pci, prestera_pci_devices); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 49e052273f30..cb564890a3dc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -798,7 +798,7 @@ static void prestera_fdb_event_work(struct work_struct *work) switch (swdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &swdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = prestera_port_fdb_set(port, fdb_info, true); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 3712e1786091..e967867828d8 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1392,7 +1392,6 @@ static int pxa168_eth_probe(struct platform_device *pdev) struct resource *res; struct clk *clk; struct device_node *np; - const unsigned char *mac_addr = NULL; int err; printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); @@ -1435,12 +1434,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); - if (pdev->dev.of_node) - mac_addr = of_get_mac_address(pdev->dev.of_node); - - if (!IS_ERR_OR_NULL(mac_addr)) { - ether_addr_copy(dev->dev_addr, mac_addr); - } else { + err = of_get_mac_address(pdev->dev.of_node, dev->dev_addr); + if (err) { /* try reading the mac address, if set by the bootloader */ pxa168_eth_get_mac_address(dev, dev->dev_addr); if (!is_valid_ether_addr(dev->dev_addr)) { diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 68c154d715d6..222c32367b2c 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4728,7 +4728,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, { struct sky2_port *sky2; struct net_device *dev = alloc_etherdev(sizeof(*sky2)); - const void *iap; + int ret; if (!dev) return NULL; @@ -4798,10 +4798,8 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, * 1) from device tree data * 2) from internal registers set by bootloader */ - iap = of_get_mac_address(hw->pdev->dev.of_node); - if (!IS_ERR(iap)) - ether_addr_copy(dev->dev_addr, iap); - else + ret = of_get_mac_address(hw->pdev->dev.of_node, dev->dev_addr); + if (ret) memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8, ETH_ALEN); diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 3362b148de23..08c2e446d3d5 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -9,6 +9,7 @@ if NET_VENDOR_MEDIATEK config NET_MEDIATEK_SOC tristate "MediaTek SoC Gigabit Ethernet support" + depends on NET_DSA || !NET_DSA select PHYLINK help This driver supports the gigabit ethernet MACs in the diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 810def064f11..6b00c12c6c43 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2484,14 +2484,11 @@ static int __init mtk_init(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - const char *mac_addr; - - mac_addr = of_get_mac_address(mac->of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(dev->dev_addr, mac_addr); + int ret; - /* If the mac address is invalid, use random mac address */ - if (!is_valid_ether_addr(dev->dev_addr)) { + ret = of_get_mac_address(mac->of_node, dev->dev_addr); + if (ret) { + /* If the mac address is invalid, use random mac address */ eth_hw_addr_random(dev); dev_err(eth->dev, "generated random MAC address %pM\n", dev->dev_addr); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 71e1ccea6e72..3ad10c793308 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -2,9 +2,8 @@ /* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ #include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/delay.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/etherdevice.h> #include <linux/platform_device.h> #include "mtk_ppe.h" @@ -44,18 +43,17 @@ static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val) static int mtk_ppe_wait_busy(struct mtk_ppe *ppe) { - unsigned long timeout = jiffies + HZ; - - while (time_is_before_jiffies(timeout)) { - if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY)) - return 0; + int ret; + u32 val; - usleep_range(10, 20); - } + ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val, + !(val & MTK_PPE_GLO_CFG_BUSY), + 20, MTK_PPE_WAIT_TIMEOUT_US); - dev_err(ppe->dev, "PPE table busy"); + if (ret) + dev_err(ppe->dev, "PPE table busy"); - return -ETIMEDOUT; + return ret; } static void mtk_ppe_cache_clear(struct mtk_ppe *ppe) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 51bd5e75bbbd..242fb8f2ae65 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -12,6 +12,7 @@ #define MTK_PPE_ENTRIES_SHIFT 3 #define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT) #define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1) +#define MTK_PPE_WAIT_TIMEOUT_US 1000000 #define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0) #define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 4975106fbc42..b5f68f66d42a 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -43,7 +43,7 @@ struct mtk_flow_entry { static const struct rhashtable_params mtk_flow_ht_params = { .head_offset = offsetof(struct mtk_flow_entry, node), - .head_offset = offsetof(struct mtk_flow_entry, cookie), + .key_offset = offsetof(struct mtk_flow_entry, cookie), .key_len = sizeof(unsigned long), .automatic_shrinking = true, }; @@ -232,6 +232,8 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) data.vlan.proto = act->vlan.proto; data.vlan.num++; break; + case FLOW_ACTION_VLAN_POP: + break; case FLOW_ACTION_PPPOE_PUSH: if (data.pppoe.num == 1) return -EOPNOTSUPP; @@ -391,6 +393,8 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f) return 0; } +static DEFINE_MUTEX(mtk_flow_offload_mutex); + static int mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { @@ -398,6 +402,7 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri struct net_device *dev = cb_priv; struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; + int err; if (!tc_can_offload(dev)) return -EOPNOTSUPP; @@ -405,18 +410,24 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri if (type != TC_SETUP_CLSFLOWER) return -EOPNOTSUPP; + mutex_lock(&mtk_flow_offload_mutex); switch (cls->command) { case FLOW_CLS_REPLACE: - return mtk_flow_offload_replace(eth, cls); + err = mtk_flow_offload_replace(eth, cls); + break; case FLOW_CLS_DESTROY: - return mtk_flow_offload_destroy(eth, cls); + err = mtk_flow_offload_destroy(eth, cls); + break; case FLOW_CLS_STATS: - return mtk_flow_offload_stats(eth, cls); + err = mtk_flow_offload_stats(eth, cls); + break; default: - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + break; } + mutex_unlock(&mtk_flow_offload_mutex); - return 0; + return err; } static int diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8bde58379ac6..a1223e904190 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -50,7 +50,7 @@ mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o + ecpf.o rdma.o esw/legacy.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 4def64d0e669..a9166cd85013 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -188,12 +188,12 @@ static bool is_ib_supported(struct mlx5_core_dev *dev) } enum { - MLX5_INTERFACE_PROTOCOL_ETH_REP, MLX5_INTERFACE_PROTOCOL_ETH, + MLX5_INTERFACE_PROTOCOL_ETH_REP, + MLX5_INTERFACE_PROTOCOL_IB, MLX5_INTERFACE_PROTOCOL_IB_REP, MLX5_INTERFACE_PROTOCOL_MPIB, - MLX5_INTERFACE_PROTOCOL_IB, MLX5_INTERFACE_PROTOCOL_VNET, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 38c7c44fe883..44c458443428 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -246,6 +246,11 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, struct mlx5_devlink_trap *dl_trap; int err = 0; + if (is_mdev_switchdev_mode(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); + return -EOPNOTSUPP; + } + dl_trap = mlx5_find_trap_by_id(dev, trap->id); if (!dl_trap) { mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); @@ -456,6 +461,50 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } + +static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool); +} + +static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + return 0; +} + +static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 esw_mode; + + if (!MLX5_ESWITCH_MANAGER(dev)) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); + return -EOPNOTSUPP; + } + esw_mode = mlx5_eswitch_mode(dev); + if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must either disabled or non switchdev mode"); + return -EBUSY; + } + return 0; +} + #endif static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, @@ -490,6 +539,12 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_port_metadata_get, + mlx5_devlink_esw_port_metadata_set, + mlx5_devlink_esw_port_metadata_validate), #endif DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_enable_remote_dev_reset_get, @@ -519,6 +574,18 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) devlink_param_driverinit_value_set(devlink, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, value); + + if (MLX5_ESWITCH_MANAGER(dev)) { + if (mlx5_esw_vport_match_metadata_supported(dev->priv.eswitch)) { + dev->priv.eswitch->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + value.vbool = true; + } else { + value.vbool = false; + } + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + value); + } #endif } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index eff107dad922..7318d44b774b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -10,6 +10,7 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b425b4a539bf..b636d63358d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -325,9 +325,9 @@ enum { MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, - MLX5E_SQ_STATE_TLS, MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, + MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, }; struct mlx5e_tx_mpwqe { @@ -500,6 +500,8 @@ struct mlx5e_xdpsq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; +struct mlx5e_ktls_resync_resp; + struct mlx5e_icosq { /* data path */ u16 cc; @@ -517,7 +519,9 @@ struct mlx5e_icosq { struct mlx5_wq_cyc wq; void __iomem *uar_map; u32 sqn; + u16 reserved_room; unsigned long state; + struct mlx5e_ktls_resync_resp *ktls_resync; /* control path */ struct mlx5_wq_ctrl wq_ctrl; @@ -1014,10 +1018,10 @@ int fn##_ctx(struct mlx5e_priv *priv, void *context) \ return fn(priv); \ } int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); -int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_preactivate preactivate, - void *context); +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset); int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 765f3064689d..0dd7615e5931 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -55,12 +55,17 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) { struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); - devlink_port_unregister(dl_port); + if (dl_port->registered) + devlink_port_unregister(dl_port); } struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + struct devlink_port *port; - return mlx5e_devlink_get_dl_port(priv); + port = mlx5e_devlink_get_dl_port(priv); + if (port->registered) + return port; + return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 84e501e057b4..6f4e6c34b2a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -128,7 +128,7 @@ int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg if (err) return err; - err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent); + err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core)); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 7b2b52e75222..f410c1268422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,6 +6,7 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "accel/ipsec.h" +#include "fpga/ipsec.h" static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -89,30 +90,39 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, return !params->lro_en && linear_frag_sz <= PAGE_SIZE; } -#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ - MLX5_MPWQE_LOG_STRIDE_SZ_BASE) -bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); - s8 signed_log_num_strides_param; - u8 log_num_strides; + if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ) + return false; - if (!mlx5e_rx_is_linear_skb(params, xsk)) + if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE || + log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX) return false; - if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX) return false; if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) - return true; + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE; - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); - signed_log_num_strides_param = - (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE; +} - return signed_log_num_strides_param >= 0; +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + s8 log_num_strides; + u8 log_stride_sz; + + if (!mlx5e_rx_is_linear_skb(params, xsk)) + return false; + + log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz; + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides); } u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, @@ -282,7 +292,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) return false; - if (MLX5_IPSEC_DEV(mdev)) + if (mlx5_fpga_is_ipsec_device(mdev)) return false; if (params->xdp_prog) { @@ -364,7 +374,7 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, u32 buf_size = 0; int i; - if (MLX5_IPSEC_DEV(mdev)) + if (mlx5_fpga_is_ipsec_device(mdev)) byte_count += MLX5E_METADATA_ETHER_LEN; if (mlx5e_rx_is_linear_skb(params, xsk)) { @@ -461,26 +471,36 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } -void mlx5e_build_rq_param(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - u16 q_counter, - struct mlx5e_rq_param *param) +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); int ndsegs = 1; switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + + if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides)) { + mlx5_core_err(mdev, + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n", + log_wqe_stride_size, log_wqe_num_of_strides); + return -EINVAL; + } + MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); break; + } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); @@ -498,6 +518,8 @@ void mlx5e_build_rq_param(struct mlx5_core_dev *mdev, param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp); + + return 0; } void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, @@ -621,6 +643,9 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ + param->is_tls = mlx5_accel_is_ktls_rx(mdev); + if (param->is_tls) + param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); MLX5_SET(wq, wq, log_wq_sz, log_wq_size); mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); @@ -639,14 +664,17 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } -void mlx5e_build_channel_param(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - u16 q_counter, - struct mlx5e_channel_param *cparam) +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz, async_icosq_log_wq_sz; + int err; - mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + if (err) + return err; icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); @@ -655,4 +683,6 @@ void mlx5e_build_channel_param(struct mlx5_core_dev *mdev, mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 602e41a2bddd..e9593f5f0661 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -30,6 +30,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; bool is_mpw; + bool is_tls; u16 stop_room; }; @@ -95,6 +96,8 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *para void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides); u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, @@ -121,11 +124,11 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, /* Build queue parameters */ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c); -void mlx5e_build_rq_param(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - u16 q_counter, - struct mlx5e_rq_param *param); +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param); void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, u16 q_counter, struct mlx5e_rq_param *param); @@ -140,10 +143,10 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_sq_param *param); -void mlx5e_build_channel_param(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - u16 q_counter, - struct mlx5e_channel_param *cparam); +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam); u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 308fd279669e..89510cac46c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -387,21 +387,6 @@ enum mlx5e_fec_supported_link_mode { *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ } while (0) -#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ - do { \ - unsigned long policy_long; \ - u16 *__policy = &(policy); \ - bool _write = (write); \ - \ - policy_long = *__policy; \ - if (_write && *__policy) \ - *__policy = find_first_bit(&policy_long, \ - sizeof(policy_long) * BITS_PER_BYTE);\ - MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ - if (!_write && *__policy) \ - *__policy = 1 << *__policy; \ - } while (0) - /* get/set FEC admin field for a given speed */ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, enum mlx5e_fec_supported_link_mode link_mode) @@ -423,16 +408,16 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 72e7dd6d78c0..d907c1acd4d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -792,6 +792,9 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) if (!priv->profile->rx_ptp_support) return 0; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + if (set) { if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) { netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index f9fdf3606bbd..0eb125316fe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -323,10 +323,12 @@ static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, struct mlx5e_priv *priv = rq->priv; struct mlx5e_params *params; u32 rq_stride, rq_sz; + bool real_time; int err; params = &priv->channels.params; rq_sz = mlx5e_rqwq_get_size(rq); + real_time = mlx5_is_real_time_rq(priv->mdev); rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); @@ -345,6 +347,10 @@ static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, if (err) return err; + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 1a0505bd1e9a..9d361efd5ff7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -257,12 +257,14 @@ mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *txqsq) { u32 sq_stride, sq_sz; + bool real_time; int err; err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); if (err) return err; + real_time = mlx5_is_real_time_sq(txqsq->mdev); sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); sq_stride = MLX5_SEND_WQE_BB; @@ -274,6 +276,10 @@ mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, if (err) return err; + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 1c44000ad675..5da5e5323a44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -188,6 +188,28 @@ mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) } static int +mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, + u32 *labels, u32 *id) +{ + if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { + *id = 0; + return 0; + } + + if (mapping_add(ct_priv->labels_mapping, labels, id)) + return -EOPNOTSUPP; + + return 0; +} + +static void +mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) +{ + if (id) + mapping_remove(ct_priv->labels_mapping, id); +} + +static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { struct flow_match_control control; @@ -438,7 +460,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); kfree(attr); } @@ -641,8 +663,8 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, if (!meta) return -EOPNOTSUPP; - err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, - &attr->ct_attr.ct_labels_id); + err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, + &attr->ct_attr.ct_labels_id); if (err) return -EOPNOTSUPP; if (nat) { @@ -679,7 +701,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, err_mapping: dealloc_mod_hdr_actions(&mod_acts); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); return err; } @@ -747,7 +769,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err_rule: mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: kfree(attr); err_attr: @@ -1199,7 +1221,7 @@ void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_ if (!priv || !ct_attr->ct_labels_id) return; - mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id); + mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); } int @@ -1302,7 +1324,7 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; - if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) + if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) return -EOPNOTSUPP; mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, MLX5_CT_LABELS_MASK); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 89d5ca91566e..9350ca05ce65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -21,6 +21,11 @@ enum { MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, }; +struct mlx5e_encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + struct mlx5e_tc_tunnel { int tunnel_type; enum mlx5_flow_match_level match_level; @@ -44,6 +49,8 @@ struct mlx5e_tc_tunnel { struct flow_cls_offload *f, void *headers_c, void *headers_v); + bool (*encap_info_equal)(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); }; extern struct mlx5e_tc_tunnel vxlan_tunnel; @@ -103,6 +110,9 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, void *headers_c, void *headers_v); +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + #endif /* CONFIG_MLX5_ESWITCH */ #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 01d435e15ad3..593503bc4d07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -477,16 +477,11 @@ void mlx5e_detach_decap(struct mlx5e_priv *priv, mlx5e_decap_dealloc(priv, d); } -struct encap_key { - const struct ip_tunnel_key *ip_tun_key; - struct mlx5e_tc_tunnel *tc_tunnel; -}; - -static int cmp_encap_info(struct encap_key *a, - struct encap_key *b) +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) { - return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && + a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; } static int cmp_decap_info(struct mlx5e_decap_key *a, @@ -495,7 +490,7 @@ static int cmp_decap_info(struct mlx5e_decap_key *a, return memcmp(&a->key, &b->key, sizeof(b->key)); } -static int hash_encap_info(struct encap_key *key) +static int hash_encap_info(struct mlx5e_encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); @@ -517,18 +512,18 @@ static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) } static struct mlx5e_encap_entry * -mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, +mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, uintptr_t hash_key) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_key e_key; struct mlx5e_encap_entry *e; - struct encap_key e_key; hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { e_key.ip_tun_key = &e->tun_info->key; e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, key) && + if (e->tunnel->encap_info_equal(&e_key, key) && mlx5e_encap_take(e)) return e; } @@ -695,8 +690,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; unsigned long tbl_time_before = 0; - struct encap_key key; struct mlx5e_encap_entry *e; + struct mlx5e_encap_key key; bool entry_created = false; unsigned short family; uintptr_t hash_key; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index 7ed3f9f79f11..f5b26f5a7de4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -329,6 +329,34 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); } +static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + struct ip_tunnel_info *a_info; + struct ip_tunnel_info *b_info; + bool a_has_opts, b_has_opts; + + if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) + return false; + + a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); + b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); + + /* keys are equal when both don't have any options attached */ + if (!a_has_opts && !b_has_opts) + return true; + + if (a_has_opts != b_has_opts) + return false; + + /* geneve options stored in memory next to ip_tunnel_info struct */ + a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); + b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); + + return a_info->options_len == b_info->options_len && + memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0; +} + struct mlx5e_tc_tunnel geneve_tunnel = { .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, .match_level = MLX5_MATCH_L4, @@ -338,4 +366,5 @@ struct mlx5e_tc_tunnel geneve_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, .parse_tunnel = mlx5e_tc_tun_parse_geneve, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c index 2805416c32a3..ada14f0574dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -94,4 +94,5 @@ struct mlx5e_tc_tunnel gre_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, .parse_udp_ports = NULL, .parse_tunnel = mlx5e_tc_tun_parse_gretap, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index 3479672e84cf..60952b33b568 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -131,4 +131,5 @@ struct mlx5e_tc_tunnel mplsoudp_tunnel = { .generate_ip_tun_hdr = generate_ip_tun_hdr, .parse_udp_ports = parse_udp_ports, .parse_tunnel = parse_tunnel, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 038a0f1cecec..4267f3a1059e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -150,4 +150,5 @@ struct mlx5e_tc_tunnel vxlan_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, .parse_tunnel = mlx5e_tc_tun_parse_vxlan, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 2371b83dad9c..055c3bc23733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -441,4 +441,10 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) return wqe_size * 2 - 1; } +static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) +{ + u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size); + + return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); +} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index cc0efac7b812..00af0b831a28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -123,11 +123,10 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, mlx5e_udp_gso_handle_tx_skb(skb); #ifdef CONFIG_MLX5_EN_TLS - if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) { - /* May send SKBs and WQEs. */ + /* May send SKBs and WQEs. */ + if (mlx5e_tls_skb_offloaded(skb)) if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls))) return false; - } #endif #ifdef CONFIG_MLX5_EN_IPSEC @@ -186,7 +185,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, struct mlx5_wqe_inline_seg *inlseg) { #ifdef CONFIG_MLX5_EN_TLS - mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls); + mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls); #endif #ifdef CONFIG_MLX5_EN_IPSEC diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index baa58b62e8df..aaa579bf9a39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -12,6 +12,9 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void); +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list); #else static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) @@ -33,6 +36,14 @@ static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enab return -EOPNOTSUPP; } +static inline struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {} #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index f7c880edae37..4e58fade7a60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -46,7 +46,8 @@ struct mlx5e_ktls_offload_context_rx { struct tls12_crypto_info_aes_gcm_128 crypto_info; struct accel_rule rule; struct sock *sk; - struct mlx5e_rq_stats *stats; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; u32 tirn; u32 key_id; @@ -55,6 +56,7 @@ struct mlx5e_ktls_offload_context_rx { /* resync */ struct mlx5e_ktls_rx_resync_ctx resync; + struct list_head list; }; static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) @@ -71,6 +73,32 @@ static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx refcount_inc(&priv_rx->resync.refcnt); } +struct mlx5e_ktls_resync_resp { + /* protects list changes */ + spinlock_t lock; + struct list_head list; +}; + +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) +{ + kvfree(resp_list); +} + +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + struct mlx5e_ktls_resync_resp *resp_list; + + resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL); + if (!resp_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&resp_list->list); + spin_lock_init(&resp_list->lock); + + return resp_list; +} + static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) { int err, inlen; @@ -118,8 +146,7 @@ out: complete(&priv_rx->add_ctx); } -static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv, - struct sock *sk) +static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv) { INIT_WORK(&rule->work, accel_rule_handle_work); rule->priv = priv; @@ -137,11 +164,10 @@ post_static_params(struct mlx5e_icosq *sq, { struct mlx5e_set_tls_static_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs; num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC); pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@ -168,11 +194,10 @@ post_progress_params(struct mlx5e_icosq *sq, { struct mlx5e_set_tls_progress_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs; num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC); pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@ -218,7 +243,7 @@ unlock: return err; err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; err = PTR_ERR(cseg); complete(&priv_rx->add_ctx); goto unlock; @@ -277,17 +302,15 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, buf->priv_rx = priv_rx; - BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); - spin_lock_bh(&sq->channel->async_icosq_lock); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { spin_unlock_bh(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; } - pi = mlx5e_icosq_get_next_pi(sq, 1); + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS); wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi); #define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)) @@ -307,7 +330,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS, - .num_wqebbs = 1, + .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS, .tls_get_params.buf = buf, }; icosq_fill_wi(sq, pi, &wi); @@ -322,7 +345,7 @@ err_dma_unmap: err_free: kfree(buf); err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; return err; } @@ -362,33 +385,32 @@ static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, /* Function can be called with the refcount being either elevated or not. * It does not affect the refcount. */ -static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, - struct mlx5e_channel *c) +static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, + struct mlx5e_channel *c) { struct tls12_crypto_info_aes_gcm_128 *info = &priv_rx->crypto_info; - struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_ktls_resync_resp *ktls_resync; struct mlx5e_icosq *sq; - int err; + bool trigger_poll; memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); - err = 0; sq = &c->async_icosq; - spin_lock_bh(&c->async_icosq_lock); + ktls_resync = sq->ktls_resync; - cseg = post_static_params(sq, priv_rx); - if (IS_ERR(cseg)) { - priv_rx->stats->tls_resync_res_skip++; - err = PTR_ERR(cseg); - goto unlock; - } - /* Do not increment priv_rx refcnt, CQE handling is empty */ - mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); - priv_rx->stats->tls_resync_res_ok++; -unlock: - spin_unlock_bh(&c->async_icosq_lock); + spin_lock_bh(&ktls_resync->lock); + list_add_tail(&priv_rx->list, &ktls_resync->list); + trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock_bh(&ktls_resync->lock); - return err; + if (!trigger_poll) + return; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(sq); + spin_unlock_bh(&c->async_icosq_lock); + } } /* Function can be called with the refcount being either elevated or not. @@ -420,13 +442,13 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, auth_state = MLX5_GET(tls_progress_params, ctx, auth_state); if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; goto out; } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); - priv_rx->stats->tls_resync_req_end++; + priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -609,7 +631,8 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->rxq = rxq; priv_rx->sk = sk; - priv_rx->stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); rqtn = priv->direct_tir[rxq].rqt.rqtn; @@ -620,7 +643,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, init_completion(&priv_rx->add_ctx); - accel_rule_init(&priv_rx->rule, priv, sk); + accel_rule_init(&priv_rx->rule, priv); resync = &priv_rx->resync; resync_init(resync, priv); tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core; @@ -630,7 +653,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, if (err) goto err_post_wqes; - priv_rx->stats->tls_ctx++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx); return 0; @@ -666,7 +689,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) if (cancel_work_sync(&resync->work)) mlx5e_ktls_priv_rx_put(priv_rx); - priv_rx->stats->tls_del++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_del); if (priv_rx->rule.rule) mlx5e_accel_fs_del_sk(priv_rx->rule.rule); @@ -678,3 +701,65 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) */ mlx5e_ktls_priv_rx_put(priv_rx); } + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp; + struct mlx5e_ktls_resync_resp *ktls_resync; + struct mlx5_wqe_ctrl_seg *db_cseg; + struct mlx5e_icosq *sq; + LIST_HEAD(local_list); + int i, j; + + sq = &c->async_icosq; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + ktls_resync = sq->ktls_resync; + db_cseg = NULL; + i = 0; + + spin_lock(&ktls_resync->lock); + list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) { + list_move(&priv_rx->list, &local_list); + if (++i == budget) + break; + } + if (list_empty(&ktls_resync->list)) + clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + + spin_lock(&c->async_icosq_lock); + for (j = 0; j < i; j++) { + struct mlx5_wqe_ctrl_seg *cseg; + + priv_rx = list_first_entry(&local_list, + struct mlx5e_ktls_offload_context_rx, + list); + cseg = post_static_params(sq, priv_rx); + if (IS_ERR(cseg)) + break; + list_del(&priv_rx->list); + db_cseg = cseg; + } + if (db_cseg) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg); + spin_unlock(&c->async_icosq_lock); + + priv_rx->rq_stats->tls_resync_res_ok += j; + + if (!list_empty(&local_list)) { + /* This happens only if ICOSQ is full. + * There is no need to mark busy or explicitly ask for a NAPI cycle, + * it will be triggered by the outstanding ICOSQ completions. + */ + spin_lock(&ktls_resync->lock); + list_splice(&local_list, &ktls_resync->list); + set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + priv_rx->rq_stats->tls_resync_res_retry++; + } + + return i == budget; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index d16def68ecff..51bdf71073f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. +#include "en_accel/tls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" @@ -50,6 +51,7 @@ static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) struct mlx5e_ktls_offload_context_tx { struct tls_offload_context_tx *tx_ctx; struct tls12_crypto_info_aes_gcm_128 crypto_info; + struct mlx5e_tls_sw_stats *sw_stats; u32 expected_seq; u32 tisn; u32 key_id; @@ -99,6 +101,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, if (err) goto err_create_key; + priv_tx->sw_stats = &priv->tls->sw_stats; priv_tx->expected_seq = start_offload_tcp_sn; priv_tx->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; @@ -111,6 +114,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, goto err_create_tis; priv_tx->ctx_post_pending = true; + atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); return 0; @@ -452,7 +456,6 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); - stats->tls_ctx++; } seq = ntohl(tcp_hdr(skb)->seq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index ee04e916fa21..8f79335057dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -40,6 +40,14 @@ mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, } return false; } + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget); + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); +} #else static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, @@ -49,6 +57,18 @@ mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, return false; } +static inline bool +mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + #endif /* CONFIG_MLX5_EN_TLS */ #endif /* __MLX5E_TLS_TXRX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index bd270a85c804..4c9274d390da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -41,10 +41,13 @@ #include "en.h" struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_ctx; atomic64_t tx_tls_drop_metadata; atomic64_t tx_tls_drop_resync_alloc; atomic64_t tx_tls_drop_no_sync_data; atomic64_t tx_tls_drop_bypass_required; + atomic64_t rx_tls_ctx; + atomic64_t rx_tls_del; atomic64_t rx_tls_drop_resync_request; atomic64_t rx_tls_resync_request; atomic64_t rx_tls_resync_reply; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 2b51d3222ca1..82dc09aaa7fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -263,9 +263,6 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, int datalen; u32 skb_seq; - if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) - return true; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); if (!datalen) return true; @@ -301,12 +298,6 @@ err_out: return false; } -void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, - struct mlx5e_accel_tx_tls_state *state) -{ - cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); -} - static int tls_update_resync_sn(struct net_device *netdev, struct sk_buff *skb, struct mlx5e_tls_metadata *mdata) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 9923132c9440..0ca0a023fb8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -47,8 +47,18 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); -void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, - struct mlx5e_accel_tx_tls_state *state); + +static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb) +{ + return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); +} + +static inline void +mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5e_accel_tx_tls_state *state) +{ + cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); +} void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, u32 *cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c index b949b9a7538b..29463bdb7715 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -45,49 +45,60 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, }; +static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, +}; + #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) -#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) - -static bool is_tls_atomic_stats(struct mlx5e_priv *priv) +static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv) { - return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev); + if (!priv->tls) + return NULL; + if (mlx5_accel_is_ktls_device(priv->mdev)) + return mlx5e_ktls_sw_stats_desc; + return mlx5e_tls_sw_stats_desc; } int mlx5e_tls_get_count(struct mlx5e_priv *priv) { - if (!is_tls_atomic_stats(priv)) + if (!priv->tls) return 0; - - return NUM_TLS_SW_COUNTERS; + if (mlx5_accel_is_ktls_device(priv->mdev)) + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); + return ARRAY_SIZE(mlx5e_tls_sw_stats_desc); } int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { - unsigned int i, idx = 0; + const struct counter_desc *stats_desc; + unsigned int i, n, idx = 0; - if (!is_tls_atomic_stats(priv)) - return 0; + stats_desc = get_tls_atomic_stats(priv); + n = mlx5e_tls_get_count(priv); - for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + for (i = 0; i < n; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_tls_sw_stats_desc[i].format); + stats_desc[i].format); - return NUM_TLS_SW_COUNTERS; + return n; } int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { - int i, idx = 0; + const struct counter_desc *stats_desc; + unsigned int i, n, idx = 0; - if (!is_tls_atomic_stats(priv)) - return 0; + stats_desc = get_tls_atomic_stats(priv); + n = mlx5e_tls_get_count(priv); - for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + for (i = 0; i < n; i++) data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, - mlx5e_tls_sw_stats_desc, i); + stats_desc, i); - return NUM_TLS_SW_COUNTERS; + return n; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index d5b1eb74d5e5..5cd466ec6492 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -392,11 +392,11 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) { struct arfs_rule *arfs_rule; struct hlist_node *htmp; + HLIST_HEAD(del_list); int quota = 0; int i; int j; - HLIST_HEAD(del_list); spin_lock_bh(&priv->fs.arfs->arfs_lock); mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) { if (!work_pending(&arfs_rule->arfs_work) && @@ -422,10 +422,10 @@ static void arfs_del_rules(struct mlx5e_priv *priv) { struct hlist_node *htmp; struct arfs_rule *rule; + HLIST_HEAD(del_list); int i; int j; - HLIST_HEAD(del_list); spin_lock_bh(&priv->fs.arfs->arfs_lock); mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index f23c67575073..a4c8d8d00d5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1149,35 +1149,23 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { - struct mlx5e_channels new_channels = {}; - bool reset_channels = true; - bool opened; - int err = 0; + struct mlx5e_params new_params; + bool reset = true; + int err; mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; - mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params, + new_params = priv->channels.params; + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params, trust_state); - opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (!opened) - reset_channels = false; - /* Skip if tx_min_inline is the same */ - if (new_channels.params.tx_min_inline_mode == - priv->channels.params.tx_min_inline_mode) - reset_channels = false; - - if (reset_channels) { - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_update_trust_state_hw, - &trust_state); - } else { - err = mlx5e_update_trust_state_hw(priv, &trust_state); - if (!err && !opened) - priv->channels.params = new_channels.params; - } + if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) + reset = false; + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_update_trust_state_hw, + &trust_state, reset); mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 964558086ad6..8360289813f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -326,7 +326,7 @@ static void mlx5e_get_ringparam(struct net_device *dev, int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param) { - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; u8 log_rq_size; u8 log_sq_size; int err = 0; @@ -365,20 +365,15 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; - new_channels.params.log_rq_mtu_frames = log_rq_size; - new_channels.params.log_sq_size = log_sq_size; + new_params = priv->channels.params; + new_params.log_rq_mtu_frames = log_rq_size; + new_params.log_sq_size = log_sq_size; - err = mlx5e_validate_params(priv->mdev, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_params); if (err) goto unlock; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto unlock; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); unlock: mutex_unlock(&priv->state_lock); @@ -423,8 +418,9 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, { struct mlx5e_params *cur_params = &priv->channels.params; unsigned int count = ch->combined_count; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; bool arfs_enabled; + bool opened; int err = 0; if (!count) { @@ -459,28 +455,18 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } - new_channels.params = *cur_params; - new_channels.params.num_channels = count; - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_params old_params; + new_params = *cur_params; + new_params.num_channels = count; - old_params = *cur_params; - *cur_params = new_channels.params; - err = mlx5e_num_channels_changed(priv); - if (err) - *cur_params = old_params; - - goto out; - } + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; + arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE); if (arfs_enabled) mlx5e_arfs_disable(priv); /* Switch to new channels, set new parameters and close old ones */ - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_num_channels_changed_ctx, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); if (arfs_enabled) { int err2 = mlx5e_arfs_enable(priv); @@ -575,8 +561,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, { struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; bool reset_rx, reset_tx; + bool reset = true; int err = 0; if (!MLX5_CAP_GEN(mdev, cq_moderation)) @@ -597,51 +584,47 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, } mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; + new_params = priv->channels.params; - rx_moder = &new_channels.params.rx_cq_moderation; + rx_moder = &new_params.rx_cq_moderation; rx_moder->usec = coal->rx_coalesce_usecs; rx_moder->pkts = coal->rx_max_coalesced_frames; - new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; - tx_moder = &new_channels.params.tx_cq_moderation; + tx_moder = &new_params.tx_cq_moderation; tx_moder->usec = coal->tx_coalesce_usecs; tx_moder->pkts = coal->tx_max_coalesced_frames; - new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; if (reset_rx) { - u8 mode = MLX5E_GET_PFLAG(&new_channels.params, + u8 mode = MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_BASED_MODER); - mlx5e_reset_rx_moderation(&new_channels.params, mode); + mlx5e_reset_rx_moderation(&new_params, mode); } if (reset_tx) { - u8 mode = MLX5E_GET_PFLAG(&new_channels.params, + u8 mode = MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_TX_CQE_BASED_MODER); - mlx5e_reset_tx_moderation(&new_channels.params, mode); + mlx5e_reset_tx_moderation(&new_params, mode); } - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto out; - } - - if (!reset_rx && !reset_tx) { + /* If DIM state hasn't changed, it's possible to modify interrupt + * moderation parameters on the fly, even if the channels are open. + */ + if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) { if (!coal->use_adaptive_rx_coalesce) mlx5e_set_priv_channels_rx_coalesce(priv, coal); if (!coal->use_adaptive_tx_coalesce) mlx5e_set_priv_channels_tx_coalesce(priv, coal); - priv->channels.params = new_channels.params; - goto out; + reset = false; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); -out: mutex_unlock(&priv->state_lock); return err; } @@ -759,11 +742,11 @@ static int get_fec_supported_advertised(struct mlx5_core_dev *dev, return 0; } -static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, - u32 eth_proto_cap, - u8 connector_type, bool ext) +static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, + struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, u8 connector_type) { - if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) @@ -899,9 +882,9 @@ static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { [MLX5E_PORT_OTHER] = PORT_OTHER, }; -static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext) +static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type) { - if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) return ptys2connector_type[connector_type]; if (eth_proto & @@ -1002,11 +985,11 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, data_rate_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - - link_ksettings->base.port = get_connector_port(eth_proto_oper, - connector_type, ext); - ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, - connector_type, ext); + connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? + connector_type : MLX5E_PORT_UNKNOWN; + link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type); + ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin, + connector_type); get_lp_advertising(mdev, eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) @@ -1602,6 +1585,14 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static void mlx5e_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_fec_get(priv, fec_stats); +} + static int mlx5e_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) { @@ -1770,6 +1761,49 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } +static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_module_eeprom_query_params query; + struct mlx5_core_dev *mdev = priv->mdev; + u8 *data = page_data->data; + int size_read; + int i = 0; + + if (!page_data->length) + return -EINVAL; + + memset(data, 0, page_data->length); + + query.offset = page_data->offset; + query.i2c_address = page_data->i2c_address; + query.bank = page_data->bank; + query.page = page_data->page; + while (i < page_data->length) { + query.size = page_data->length - i; + size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i); + + /* Done reading, return how many bytes was read */ + if (!size_read) + return i; + + if (size_read == -EINVAL) + return -EINVAL; + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n", + __func__, size_read); + return i; + } + + i += size_read; + query.offset += size_read; + } + + return i; +} + int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash) { @@ -1809,7 +1843,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; bool mode_changed; u8 cq_period_mode, current_cq_period_mode; @@ -1828,18 +1862,13 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, if (!mode_changed) return 0; - new_channels.params = priv->channels.params; + new_params = priv->channels.params; if (is_rx_cq) - mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode); + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); else - mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); - return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1855,7 +1884,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) { bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; int err = 0; if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) @@ -1864,21 +1893,16 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - new_channels.params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + new_params = priv->channels.params; + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) - new_channels.params.ptp_rx = new_val; + new_params.ptp_rx = new_val; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } - - if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + if (new_params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); else - err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, - &new_channels.params.ptp_rx); + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); if (err) return err; @@ -1912,7 +1936,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; if (enable) { if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) @@ -1924,17 +1948,12 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return -EINVAL; } - new_channels.params = priv->channels.params; - - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); - mlx5e_set_rq_type(mdev, &new_channels.params); + new_params = priv->channels.params; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_params); - return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1963,23 +1982,16 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; - int err; + struct mlx5e_params new_params; if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) return -EOPNOTSUPP; - new_channels.params = priv->channels.params; + new_params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, flag, enable); + MLX5E_SET_PFLAG(&new_params, flag, enable); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); - return err; + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) @@ -1996,7 +2008,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; int err; if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) @@ -2012,27 +2024,15 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) return -EINVAL; } - new_channels.params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable); + new_params = priv->channels.params; + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable); /* No need to verify SQ stop room as * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both * has the same log_sq_size. */ - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_params old_params; - - old_params = priv->channels.params; - priv->channels.params = new_channels.params; - err = mlx5e_num_channels_changed(priv); - if (err) - priv->channels.params = old_params; - goto out; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_num_channels_changed_ctx, NULL); -out: + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); if (!err) priv->tx_ptp_opened = true; @@ -2125,12 +2125,216 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return mlx5e_ethtool_set_rxnfc(dev, cmd); } +static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode) +{ + struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page; + u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(pddr_reg)]; + int err; + + MLX5_SET(pddr_reg, in, local_port, 1); + MLX5_SET(pddr_reg, in, page_select, + MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE); + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data); + MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page, + group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR); + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PDDR, 0, 0); + if (err) + return err; + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data); + *status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page, + status_opcode); + return 0; +} + +struct mlx5e_ethtool_link_ext_state_opcode_mapping { + u32 status_opcode; + enum ethtool_link_ext_state link_ext_state; + u8 link_ext_substate; +}; + +static const struct mlx5e_ethtool_link_ext_state_opcode_mapping +mlx5e_link_ext_state_opcode_map[] = { + /* States relating to the autonegotiation or issues therein */ + {2, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED}, + {3, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED}, + {4, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED}, + {36, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE}, + {38, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE}, + {39, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD}, + + /* Failure during link training */ + {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED}, + {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT}, + {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY}, + {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0}, + {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT}, + + /* Logical mismatch in physical coding sublayer or forward error correction sublayer */ + {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK}, + {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK}, + {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS}, + {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED}, + {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED}, + + /* Signal integrity issues */ + {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0}, + {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS}, + {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE}, + + /* No cable connected */ + {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0}, + + /* Failure is related to cable, e.g., unsupported cable */ + {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0}, + + /* Failure is related to EEPROM, e.g., failure during reading or parsing the data */ + {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0}, + + /* Failure during calibration algorithm */ + {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0}, + + /* The hardware is not able to provide the power required from cable or module */ + {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0}, + + /* The module is overheated */ + {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0}, +}; + +static void +mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping + link_ext_state_mapping, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + switch (link_ext_state_mapping.link_ext_state) { + case ETHTOOL_LINK_EXT_STATE_AUTONEG: + link_ext_state_info->autoneg = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE: + link_ext_state_info->link_training = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH: + link_ext_state_info->link_logical_mismatch = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY: + link_ext_state_info->bad_signal_integrity = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE: + link_ext_state_info->cable_issue = + link_ext_state_mapping.link_ext_substate; + break; + default: + break; + } + + link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state; +} + +static int +mlx5e_get_link_ext_state(struct net_device *dev, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping; + struct mlx5e_priv *priv = netdev_priv(dev); + u32 status_opcode = 0; + int i; + + /* Exit without data if the interface state is OK, since no extended data is + * available in such case + */ + if (netif_carrier_ok(dev)) + return -ENODATA; + + if (query_port_status_opcode(priv->mdev, &status_opcode) || + !status_opcode) + return -ENODATA; + + for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) { + link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i]; + if (link_ext_state_mapping.status_opcode == status_opcode) { + mlx5e_set_link_ext_state(link_ext_state_mapping, + link_ext_state_info); + return 0; + } + } + + return -ENODATA; +} + +static void mlx5e_get_eth_phy_stats(struct net_device *netdev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_phy_get(priv, phy_stats); +} + +static void mlx5e_get_eth_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_mac_get(priv, mac_stats); +} + +static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_ctrl_get(priv, ctrl_stats); +} + +static void mlx5e_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_rmon_get(priv, rmon_stats, ranges); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, + .get_link_ext_state = mlx5e_get_link_ext_state, .get_strings = mlx5e_get_strings, .get_sset_count = mlx5e_get_sset_count, .get_ethtool_stats = mlx5e_get_ethtool_stats, @@ -2159,12 +2363,18 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page, .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, .get_msglevel = mlx5e_get_msglevel, .set_msglevel = mlx5e_set_msglevel, + .get_fec_stats = mlx5e_get_fec_stats, .get_fecparam = mlx5e_get_fecparam, .set_fecparam = mlx5e_set_fecparam, + .get_eth_phy_stats = mlx5e_get_eth_phy_stats, + .get_eth_mac_stats = mlx5e_get_eth_mac_stats, + .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats, + .get_rmon_stats = mlx5e_get_rmon_stats, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 773449c1424b..bca832cdc4cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -510,8 +510,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->page_pool = NULL; goto err_free_by_rq_type; } - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_POOL, rq->page_pool); + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); } if (err) goto err_free_by_rq_type; @@ -1041,6 +1042,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->reserved_room = param->stop_room; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1132,8 +1134,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - if (mlx5_accel_is_tls_device(c->priv->mdev)) - set_bit(MLX5E_SQ_STATE_TLS, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); sq->stop_room = param->stop_room; @@ -1409,8 +1409,17 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, if (err) goto err_free_icosq; + if (param->is_tls) { + sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); + if (IS_ERR(sq->ktls_resync)) { + err = PTR_ERR(sq->ktls_resync); + goto err_destroy_icosq; + } + } return 0; +err_destroy_icosq: + mlx5e_destroy_sq(c->mdev, sq->sqn); err_free_icosq: mlx5e_free_icosq(sq); @@ -1432,6 +1441,8 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; + if (sq->ktls_resync) + mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync); mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_icosq_descs(sq); mlx5e_free_icosq(sq); @@ -2075,7 +2086,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (!chs->c || !cparam) goto err_free; - mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); + err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); + if (err) + goto err_free; + for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL; @@ -2833,6 +2847,29 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) mlx5e_deactivate_channels(&priv->channels); } +static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context) +{ + struct mlx5e_params old_params; + + old_params = priv->channels.params; + priv->channels.params = *new_params; + + if (preactivate) { + int err; + + err = preactivate(priv, context); + if (err) { + priv->channels.params = old_params; + return err; + } + } + + return 0; +} + static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_preactivate preactivate, @@ -2875,35 +2912,32 @@ out: return err; } -int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_preactivate preactivate, - void *context) +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset) { + struct mlx5e_channels new_chs = {}; int err; - err = mlx5e_open_channels(priv, new_chs); + reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); + if (!reset) + return mlx5e_switch_priv_params(priv, params, preactivate, context); + + new_chs.params = *params; + err = mlx5e_open_channels(priv, &new_chs); if (err) return err; - - err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); + err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); if (err) - goto err_close; - - return 0; - -err_close: - mlx5e_close_channels(new_chs); + mlx5e_close_channels(&new_chs); return err; } int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) { - struct mlx5e_channels new_channels = {}; - - new_channels.params = priv->channels.params; - return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true); } void mlx5e_timestamp_init(struct mlx5e_priv *priv) @@ -3360,7 +3394,7 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, struct tc_mqprio_qopt *mqprio) { - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; u8 tc = mqprio->num_tc; int err = 0; @@ -3379,23 +3413,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, goto out; } - new_channels.params = priv->channels.params; - new_channels.params.num_tc = tc ? tc : 1; + new_params = priv->channels.params; + new_params.num_tc = tc ? tc : 1; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_params old_params; - - old_params = priv->channels.params; - priv->channels.params = new_channels.params; - err = mlx5e_num_channels_changed(priv); - if (err) - priv->channels.params = old_params; - - goto out; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_num_channels_changed_ctx, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); out: priv->max_opened_tc = max_t(u8, priv->max_opened_tc, @@ -3620,10 +3642,10 @@ static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; struct mlx5e_params *cur_params; + struct mlx5e_params new_params; + bool reset = true; int err = 0; - bool reset; mutex_lock(&priv->state_lock); @@ -3641,30 +3663,17 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); + new_params = *cur_params; + new_params.lro_en = enable; - new_channels.params = *cur_params; - new_channels.params.lro_en = enable; - - if (cur_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) reset = false; } - if (!reset) { - struct mlx5e_params old_params; - - old_params = *cur_params; - *cur_params = new_channels.params; - err = mlx5e_modify_tirs_lro(priv); - if (err) - *cur_params = old_params; - goto out; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_modify_tirs_lro_ctx, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_modify_tirs_lro_ctx, NULL, reset); out: mutex_unlock(&priv->state_lock); return err; @@ -3893,26 +3902,23 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, mlx5e_fp_preactivate preactivate) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; struct mlx5e_params *params; + bool reset = true; int err = 0; - bool reset; mutex_lock(&priv->state_lock); params = &priv->channels.params; - reset = !params->lro_en; - reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); - - new_channels.params = *params; - new_channels.params.sw_mtu = new_mtu; - err = mlx5e_validate_params(priv->mdev, &new_channels.params); + new_params = *params; + new_params.sw_mtu = new_mtu; + err = mlx5e_validate_params(priv->mdev, &new_params); if (err) goto out; if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { + !mlx5e_rx_is_linear_skb(&new_params, NULL)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", new_mtu, mlx5e_xdp_max_mtu(params, NULL)); err = -EINVAL; @@ -3921,47 +3927,34 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (priv->xsk.refcnt && !mlx5e_xsk_validate_mtu(netdev, &priv->channels, - &new_channels.params, priv->mdev)) { + &new_params, priv->mdev)) { err = -EINVAL; goto out; } + if (params->lro_en) + reset = false; + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, - &new_channels.params, - NULL); + bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); + bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_params, NULL); u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL); - /* If XSK is active, XSK RQs are linear. */ - is_linear |= priv->xsk.refcnt; - - /* Always reset in linear mode - hw_mtu is used in data path. */ - reset = reset && (is_linear || (ppw_old != ppw_new)); - } - - if (!reset) { - unsigned int old_mtu = params->sw_mtu; - - params->sw_mtu = new_mtu; - if (preactivate) { - err = preactivate(priv, NULL); - if (err) { - params->sw_mtu = old_mtu; - goto out; - } - } - netdev->mtu = params->sw_mtu; - goto out; + /* Always reset in linear mode - hw_mtu is used in data path. + * Check that the mode was non-linear and didn't change. + * If XSK is active, XSK RQs are linear. + */ + if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && + ppw_old == ppw_new) + reset = false; } - err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL); - if (err) - goto out; - - netdev->mtu = new_channels.params.sw_mtu; + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset); out: + netdev->mtu = params->sw_mtu; mutex_unlock(&priv->state_lock); return err; } @@ -3980,7 +3973,7 @@ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; struct hwtstamp_config config; bool rx_cqe_compress_def; int err; @@ -4002,13 +3995,13 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) } mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; + new_params = priv->channels.params; rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - new_channels.params.ptp_rx = false; + new_params.ptp_rx = false; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -4025,7 +4018,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - new_channels.params.ptp_rx = rx_cqe_compress_def; + new_params.ptp_rx = rx_cqe_compress_def; config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: @@ -4033,15 +4026,11 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) return -ERANGE; } - if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + if (new_params.ptp_rx == priv->channels.params.ptp_rx) goto out; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto out; - } - err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, - &new_channels.params.ptp_rx); + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); if (err) { mutex_unlock(&priv->state_lock); return err; @@ -4359,7 +4348,7 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) { struct net_device *netdev = priv->netdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; if (priv->channels.params.lro_en) { netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); @@ -4372,16 +4361,16 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - new_channels.params = priv->channels.params; - new_channels.params.xdp_prog = prog; + new_params = priv->channels.params; + new_params.xdp_prog = prog; /* No XSK params: AF_XDP can't be enabled yet at the point of setting * the XDP program. */ - if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { + if (!mlx5e_rx_is_linear_skb(&new_params, NULL)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_channels.params.sw_mtu, - mlx5e_xdp_max_mtu(&new_channels.params, NULL)); + new_params.sw_mtu, + mlx5e_xdp_max_mtu(&new_params, NULL)); return -EINVAL; } @@ -4401,9 +4390,10 @@ static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; struct bpf_prog *old_prog; - bool reset, was_opened; int err = 0; + bool reset; int i; mutex_lock(&priv->state_lock); @@ -4414,46 +4404,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) goto unlock; } - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && !reset) - /* num_channels is invariant here, so we can take the - * batched reference right upfront. - */ - bpf_prog_add(prog, priv->channels.num); - - if (was_opened && reset) { - struct mlx5e_channels new_channels = {}; - - new_channels.params = priv->channels.params; - new_channels.params.xdp_prog = prog; - mlx5e_set_rq_type(priv->mdev, &new_channels.params); - old_prog = priv->channels.params.xdp_prog; + new_params = priv->channels.params; + new_params.xdp_prog = prog; + if (reset) + mlx5e_set_rq_type(priv->mdev, &new_params); + old_prog = priv->channels.params.xdp_prog; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); - if (err) - goto unlock; - } else { - /* exchange programs, extra prog reference we got from caller - * as long as we don't fail from this point onwards. - */ - old_prog = xchg(&priv->channels.params.xdp_prog, prog); - } + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + if (err) + goto unlock; if (old_prog) bpf_prog_put(old_prog); - if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_type(priv->mdev, &priv->channels.params); - - if (!was_opened || reset) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) goto unlock; /* exchanging programs w/o reset, we update ref counts on behalf * of the channels RQs here. */ + bpf_prog_add(prog, priv->channels.num); for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; @@ -4916,6 +4889,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct devlink_port *dl_port; int err; mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); @@ -4931,14 +4905,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); - mlx5e_health_create_reporters(priv); + dl_port = mlx5e_devlink_get_dl_port(priv); + if (dl_port->registered) + mlx5e_health_create_reporters(priv); return 0; } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { - mlx5e_health_destroy_reporters(priv); + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + + if (dl_port->registered) + mlx5e_health_destroy_reporters(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 9ef8e4a671a7..34eb1118670f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -52,7 +52,7 @@ #include "diag/en_rep_tracepoint.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ - max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -335,8 +335,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, } /* Add re-inject rule to the PF/representor sqs */ - flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - rep->vport, + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -973,8 +972,9 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5e_rep_tc_enable(priv); - mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, - 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); + if (MLX5_CAP_GEN(mdev, uplink_follow)) + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index f67e51d8291a..e4f5b6395148 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -116,7 +116,6 @@ static const struct counter_desc sw_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, @@ -180,13 +179,12 @@ static const struct counter_desc sw_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_ctx) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_del) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) }, #endif @@ -342,13 +340,12 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; - s->rx_tls_ctx += rq_stats->tls_ctx; - s->rx_tls_del += rq_stats->tls_del; s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt; s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start; s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end; s->rx_tls_resync_req_skip += rq_stats->tls_resync_req_skip; s->rx_tls_resync_res_ok += rq_stats->tls_resync_res_ok; + s->rx_tls_resync_res_retry += rq_stats->tls_resync_res_retry; s->rx_tls_resync_res_skip += rq_stats->tls_resync_res_skip; s->rx_tls_err += rq_stats->tls_err; #endif @@ -390,7 +387,6 @@ static void mlx5e_stats_grp_sw_update_stats_sq(struct mlx5e_sw_stats *s, #ifdef CONFIG_MLX5_EN_TLS s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; - s->tx_tls_ctx += sq_stats->tls_ctx; s->tx_tls_ooo += sq_stats->tls_ooo; s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; s->tx_tls_dump_packets += sq_stats->tls_dump_packets; @@ -774,35 +770,112 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } -#define MLX5E_READ_CTR64_BE_F(ptr, c) \ +#define MLX5E_READ_CTR64_BE_F(ptr, set, c) \ be64_to_cpu(*(__be64 *)((char *)ptr + \ MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_802_3_cntrs_grp_data_layout.c##_high))) + counter_set.set.c##_high))) -void mlx5e_stats_pause_get(struct mlx5e_priv *priv, - struct ethtool_pause_stats *pause_stats) +static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev, + u32 *ppcnt_ieee_802_3) { - u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; - struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) - return; + return -EOPNOTSUPP; MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, - sz, MLX5_REG_PPCNT, 0, 0); + return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, + sz, MLX5_REG_PPCNT, 0, 0); +} + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; pause_stats->tx_pause_frames = MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, a_pause_mac_ctrl_frames_transmitted); pause_stats->rx_pause_frames = MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, a_pause_mac_ctrl_frames_received); } +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + phy_stats->SymbolErrorDuringCarrier = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_symbol_error_during_carrier); +} + +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, \ + eth_802_3_cntrs_grp_data_layout, \ + name) + + mac_stats->FramesTransmittedOK = RD(a_frames_transmitted_ok); + mac_stats->FramesReceivedOK = RD(a_frames_received_ok); + mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors); + mac_stats->OctetsTransmittedOK = RD(a_octets_transmitted_ok); + mac_stats->OctetsReceivedOK = RD(a_octets_received_ok); + mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok); + mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok); + mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok); + mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok); + mac_stats->InRangeLengthErrors = RD(a_in_range_length_errors); + mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field); + mac_stats->FrameTooLongErrors = RD(a_frame_too_long_errors); +#undef RD +} + +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + ctrl_stats->MACControlFramesTransmitted = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_transmitted); + ctrl_stats->MACControlFramesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_received); + ctrl_stats->UnsupportedOpcodesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_unsupported_opcodes_received); +} + #define PPORT_2863_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) @@ -914,6 +987,59 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } +static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters, \ + eth_2819_cntrs_grp_data_layout, \ + name) + + rmon->undersize_pkts = RD(ether_stats_undersize_pkts); + rmon->fragments = RD(ether_stats_fragments); + rmon->jabbers = RD(ether_stats_jabbers); + + rmon->hist[0] = RD(ether_stats_pkts64octets); + rmon->hist[1] = RD(ether_stats_pkts65to127octets); + rmon->hist[2] = RD(ether_stats_pkts128to255octets); + rmon->hist[3] = RD(ether_stats_pkts256to511octets); + rmon->hist[4] = RD(ether_stats_pkts512to1023octets); + rmon->hist[5] = RD(ether_stats_pkts1024to1518octets); + rmon->hist[6] = RD(ether_stats_pkts1519to2047octets); + rmon->hist[7] = RD(ether_stats_pkts2048to4095octets); + rmon->hist[8] = RD(ether_stats_pkts4096to8191octets); + rmon->hist[9] = RD(ether_stats_pkts8192to10239octets); +#undef RD + + *ranges = mlx5e_rmon_ranges; +} + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -1021,6 +1147,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + + fec_stats->corrected_bits.total = + MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical, + phys_layer_statistical_cntrs, + phy_corrected_bits); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -1630,13 +1779,12 @@ static const struct counter_desc rq_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_ctx) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_del) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) }, #endif @@ -1658,7 +1806,6 @@ static const struct counter_desc sq_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@ -1816,7 +1963,6 @@ static const struct counter_desc qos_sq_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index ca398eac09c1..139e59f30db0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -114,6 +114,18 @@ void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_stats_pause_get(struct mlx5e_priv *priv, struct ethtool_pause_stats *pause_stats); +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats); + +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats); +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats); +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats); +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges); /* Concrete NIC Stats */ @@ -192,7 +204,6 @@ struct mlx5e_sw_stats { #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; - u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_dump_packets; u64 tx_tls_dump_bytes; @@ -203,13 +214,12 @@ struct mlx5e_sw_stats { u64 rx_tls_decrypted_packets; u64 rx_tls_decrypted_bytes; - u64 rx_tls_ctx; - u64 rx_tls_del; u64 rx_tls_resync_req_pkt; u64 rx_tls_resync_req_start; u64 rx_tls_resync_req_end; u64 rx_tls_resync_req_skip; u64 rx_tls_resync_res_ok; + u64 rx_tls_resync_res_retry; u64 rx_tls_resync_res_skip; u64 rx_tls_err; #endif @@ -335,13 +345,12 @@ struct mlx5e_rq_stats { #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; - u64 tls_ctx; - u64 tls_del; u64 tls_resync_req_pkt; u64 tls_resync_req_start; u64 tls_resync_req_end; u64 tls_resync_req_skip; u64 tls_resync_res_ok; + u64 tls_resync_res_retry; u64 tls_resync_res_skip; u64 tls_err; #endif @@ -365,7 +374,6 @@ struct mlx5e_sq_stats { #ifdef CONFIG_MLX5_EN_TLS u64 tls_encrypted_packets; u64 tls_encrypted_bytes; - u64 tls_ctx; u64 tls_ooo; u64 tls_dump_packets; u64 tls_dump_bytes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d157d1b9cad6..47a9c49b25fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1942,6 +1942,9 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; flow_rule_match_meta(rule, &match); + if (!match.mask->ingress_ifindex) + return 0; + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index d54da3797c30..833be29170a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -36,6 +36,7 @@ #include "en/xdp.h" #include "en/xsk/rx.h" #include "en/xsk/tx.h" +#include "en_accel/ktls_txrx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { @@ -171,6 +172,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) */ clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state); + /* Keep after async ICOSQ CQ poll */ + if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget))) + busy |= mlx5e_ktls_rx_handle_resync_list(c, budget); + busy |= INDIRECT_CALL_2(rq->post_wqes, mlx5e_post_rx_mpwqes, mlx5e_post_rx_wqes, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 174dfbc996c6..77c0ca655975 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -271,7 +271,7 @@ static void init_eq_buf(struct mlx5_eq *eq) struct mlx5_eqe *eqe; int i; - for (i = 0; i < eq->nent; i++) { + for (i = 0; i < eq_get_size(eq); i++) { eqe = get_eqe(eq, i); eqe->owner = MLX5_EQE_OWNER_INIT_VAL; } @@ -281,8 +281,10 @@ static int create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { + u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; + u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; u8 vecidx = param->irq_index; __be64 *pas; @@ -297,16 +299,18 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; - err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); + + err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), + &eq->frag_buf, dev->priv.numa_node); if (err) return err; + mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + - MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) { @@ -315,7 +319,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, } pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); - mlx5_fill_page_array(&eq->buf, pas); + mlx5_fill_page_frag_array(&eq->frag_buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) @@ -326,11 +330,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, param->mask[i]); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); - MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); + MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, - eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) @@ -356,7 +360,7 @@ err_in: kvfree(in); err_buf: - mlx5_buf_free(dev, &eq->buf); + mlx5_frag_buf_free(dev, &eq->frag_buf); return err; } @@ -413,7 +417,7 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) eq->eqn); synchronize_irq(eq->irqn); - mlx5_buf_free(dev, &eq->buf); + mlx5_frag_buf_free(dev, &eq->frag_buf); return err; } @@ -764,10 +768,11 @@ EXPORT_SYMBOL(mlx5_eq_destroy_generic); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) { u32 ci = eq->cons_index + cc; + u32 nent = eq_get_size(eq); struct mlx5_eqe *eqe; - eqe = get_eqe(eq, ci & (eq->nent - 1)); - eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + eqe = get_eqe(eq, ci & (nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -931,13 +936,24 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_unlock(&table->lock); } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +#define MLX5_MAX_ASYNC_EQS 4 +#else +#define MLX5_MAX_ASYNC_EQS 3 +#endif + int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); int err; eq_table->num_comp_eqs = - mlx5_irq_get_num_comp(eq_table->irq_table); + min_t(int, + mlx5_irq_get_num_comp(eq_table->irq_table), + num_eqs - MLX5_MAX_ASYNC_EQS); err = create_async_eqs(dev); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c new file mode 100644 index 000000000000..8ab1224653a4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "esw/acl/lgcy.h" +#include "esw/legacy.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "fs_core.h" + +enum { + LEGACY_VEPA_PRIO = 0, + LEGACY_FDB_PRIO, +}; + +static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + int err; + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* num FTE 2, num FG 2 */ + ft_attr.prio = LEGACY_VEPA_PRIO; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); + return err; + } + esw->fdb_table.legacy.vepa_fdb = fdb; + + return 0; +} + +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy FDB Table\n"); + if (!esw->fdb_table.legacy.fdb) + return; + + if (esw->fdb_table.legacy.promisc_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + if (esw->fdb_table.legacy.allmulti_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + if (esw->fdb_table.legacy.addr_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + + esw->fdb_table.legacy.fdb = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; + atomic64_set(&esw->user_count, 0); +} + +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + ft_attr.max_fte = table_size; + ft_attr.prio = LEGACY_FDB_PRIO; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + esw->fdb_table.legacy.fdb = fdb; + + /* Addresses group : Full match unicast/multicast addresses */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); + eth_broadcast_addr(dmac); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.addr_grp = g; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.promisc_grp = g; + +out: + if (err) + esw_destroy_legacy_fdb_table(esw); + + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy VEPA Table\n"); + if (!esw->fdb_table.legacy.vepa_fdb) + return; + + mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); + esw->fdb_table.legacy.vepa_fdb = NULL; +} + +static int esw_create_legacy_table(struct mlx5_eswitch *esw) +{ + int err; + + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + atomic64_set(&esw->user_count, 0); + + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + + err = esw_create_legacy_fdb_table(esw); + if (err) + esw_destroy_legacy_vepa_table(esw); + + return err; +} + +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.legacy.vepa_uplink_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); + + if (esw->fdb_table.legacy.vepa_star_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); + + esw->fdb_table.legacy.vepa_uplink_rule = NULL; + esw->fdb_table.legacy.vepa_star_rule = NULL; +} + +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int ret, i; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + if (ret) + esw_destroy_legacy_table(esw); + return ret; +} + +void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + +static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, + u8 setting) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + if (!setting) { + esw_cleanup_vepa_rules(esw); + return 0; + } + + if (esw->fdb_table.legacy.vepa_uplink_rule) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Uplink rule forward uplink traffic to FDB */ + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->fdb_table.legacy.fdb; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; + } + + /* Star rule to forward all traffic to uplink vport */ + memset(&dest, 0, sizeof(dest)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_star_rule = flow_rule; + } + +out: + kvfree(spec); + if (err) + esw_cleanup_vepa_rules(esw); + return err; +} + +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + err = _mlx5_eswitch_set_vepa_locked(esw, setting); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) +{ + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; + + *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; + return 0; +} + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int ret; + + /* Only non manager vports need ACL in legacy mode */ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return 0; + + ret = esw_acl_ingress_lgcy_setup(esw, vport); + if (ret) + goto ingress_err; + + ret = esw_acl_egress_lgcy_setup(esw, vport); + if (ret) + goto egress_err; + + return 0; + +egress_err: + esw_acl_ingress_lgcy_cleanup(esw, vport); +ingress_err: + return ret; +} + +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return; + + esw_acl_egress_lgcy_cleanup(esw, vport); + esw_acl_ingress_lgcy_cleanup(esw, vport); +} + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats) +{ + u64 rx_discard_vport_down, tx_discard_vport_down; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u64 bytes = 0; + int err = 0; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return 0; + + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) + mlx5_fc_query(dev, vport->egress.legacy.drop_counter, + &stats->rx_dropped, &bytes); + + if (vport->ingress.legacy.drop_counter) + mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, + &stats->tx_dropped, &bytes); + + if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && + !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + goto unlock; + + err = mlx5_query_vport_down_stats(dev, vport->vport, 1, + &rx_discard_vport_down, + &tx_discard_vport_down); + if (err) + goto unlock; + + if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) + stats->rx_dropped += rx_discard_vport_down; + if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + stats->tx_dropped += tx_discard_vport_down; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + if (!vlan) + goto unlock; /* compatibility with libvirt */ + + err = -EOPNOTSUPP; + goto unlock; + } + + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + u16 vport, bool spoofchk) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool pschk; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) + err = esw_acl_ingress_lgcy_setup(esw, evport); + if (err) + evport->info.spoofchk = pschk; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + u16 vport, bool setting) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + evport->info.trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h new file mode 100644 index 000000000000..e0820bb72b57 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#ifndef __MLX5_ESW_LEGACY_H__ +#define __MLX5_ESW_LEGACY_H__ + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +struct mlx5_eswitch; + +int esw_legacy_enable(struct mlx5_eswitch *esw); +void esw_legacy_disable(struct mlx5_eswitch *esw); + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6cf04a366f99..1bb229ecd43b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "esw/acl/lgcy.h" +#include "esw/legacy.h" #include "mlx5_core.h" #include "lib/eq.h" #include "eswitch.h" @@ -61,9 +62,6 @@ struct vport_addr { bool mc_promisc; }; -static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); -static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); - static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) { if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -278,226 +276,6 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -enum { - LEGACY_VEPA_PRIO = 0, - LEGACY_FDB_PRIO, -}; - -static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *fdb; - int err; - - root_ns = mlx5_get_fdb_sub_ns(dev, 0); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - return -EOPNOTSUPP; - } - - /* num FTE 2, num FG 2 */ - ft_attr.prio = LEGACY_VEPA_PRIO; - ft_attr.max_fte = 2; - ft_attr.autogroup.max_num_groups = 2; - fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); - return err; - } - esw->fdb_table.legacy.vepa_fdb = fdb; - - return 0; -} - -static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *fdb; - struct mlx5_flow_group *g; - void *match_criteria; - int table_size; - u32 *flow_group_in; - u8 *dmac; - int err = 0; - - esw_debug(dev, "Create FDB log_max_size(%d)\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - - root_ns = mlx5_get_fdb_sub_ns(dev, 0); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - return -EOPNOTSUPP; - } - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) - return -ENOMEM; - - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; - ft_attr.prio = LEGACY_FDB_PRIO; - fdb = mlx5_create_flow_table(root_ns, &ft_attr); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create FDB Table err %d\n", err); - goto out; - } - esw->fdb_table.legacy.fdb = fdb; - - /* Addresses group : Full match unicast/multicast addresses */ - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - /* Preserve 2 entries for allmulti and promisc rules*/ - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); - eth_broadcast_addr(dmac); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create flow group err(%d)\n", err); - goto out; - } - esw->fdb_table.legacy.addr_grp = g; - - /* Allmulti group : One rule that forwards any mcast traffic */ - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); - eth_zero_addr(dmac); - dmac[0] = 0x01; - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); - goto out; - } - esw->fdb_table.legacy.allmulti_grp = g; - - /* Promiscuous group : - * One rule that forward all unmatched traffic from previous groups - */ - eth_zero_addr(dmac); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); - goto out; - } - esw->fdb_table.legacy.promisc_grp = g; - -out: - if (err) - esw_destroy_legacy_fdb_table(esw); - - kvfree(flow_group_in); - return err; -} - -static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) -{ - esw_debug(esw->dev, "Destroy VEPA Table\n"); - if (!esw->fdb_table.legacy.vepa_fdb) - return; - - mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); - esw->fdb_table.legacy.vepa_fdb = NULL; -} - -static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) -{ - esw_debug(esw->dev, "Destroy FDB Table\n"); - if (!esw->fdb_table.legacy.fdb) - return; - - if (esw->fdb_table.legacy.promisc_grp) - mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); - if (esw->fdb_table.legacy.allmulti_grp) - mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); - if (esw->fdb_table.legacy.addr_grp) - mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); - mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); - - esw->fdb_table.legacy.fdb = NULL; - esw->fdb_table.legacy.addr_grp = NULL; - esw->fdb_table.legacy.allmulti_grp = NULL; - esw->fdb_table.legacy.promisc_grp = NULL; - atomic64_set(&esw->user_count, 0); -} - -static int esw_create_legacy_table(struct mlx5_eswitch *esw) -{ - int err; - - memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); - atomic64_set(&esw->user_count, 0); - - err = esw_create_legacy_vepa_table(esw); - if (err) - return err; - - err = esw_create_legacy_fdb_table(esw); - if (err) - esw_destroy_legacy_vepa_table(esw); - - return err; -} - -static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) -{ - esw_cleanup_vepa_rules(esw); - esw_destroy_legacy_fdb_table(esw); - esw_destroy_legacy_vepa_table(esw); -} - -#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ - MLX5_VPORT_MC_ADDR_CHANGE | \ - MLX5_VPORT_PROMISC_CHANGE) - -static int esw_legacy_enable(struct mlx5_eswitch *esw) -{ - struct mlx5_vport *vport; - int ret, i; - - ret = esw_create_legacy_table(esw); - if (ret) - return ret; - - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) - vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; - - ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); - if (ret) - esw_destroy_legacy_table(esw); - return ret; -} - -static void esw_legacy_disable(struct mlx5_eswitch *esw) -{ - struct esw_mc_addr *mc_promisc; - - mlx5_eswitch_disable_pf_vf_vports(esw); - - mc_promisc = &esw->mc_promisc; - if (mc_promisc->uplink_rule) - mlx5_del_flow_rules(mc_promisc->uplink_rule); - - esw_destroy_legacy_table(esw); -} - /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -919,7 +697,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, (promisc_all || promisc_mc)); } -static void esw_vport_change_handle_locked(struct mlx5_vport *vport) +void esw_vport_change_handle_locked(struct mlx5_vport *vport) { struct mlx5_core_dev *dev = vport->dev; struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -1170,56 +948,20 @@ static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac) ((u8 *)node_guid)[0] = mac[5]; } -static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int ret; - - /* Only non manager vports need ACL in legacy mode */ - if (mlx5_esw_is_manager_vport(esw, vport->vport)) - return 0; - - ret = esw_acl_ingress_lgcy_setup(esw, vport); - if (ret) - goto ingress_err; - - ret = esw_acl_egress_lgcy_setup(esw, vport); - if (ret) - goto egress_err; - - return 0; - -egress_err: - esw_acl_ingress_lgcy_cleanup(esw, vport); -ingress_err: - return ret; -} - static int esw_vport_setup_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (esw->mode == MLX5_ESWITCH_LEGACY) - return esw_vport_create_legacy_acl_tables(esw, vport); + return esw_legacy_vport_acl_setup(esw, vport); else return esw_vport_create_offloads_acl_tables(esw, vport); } -static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) - -{ - if (mlx5_esw_is_manager_vport(esw, vport->vport)) - return; - - esw_acl_egress_lgcy_cleanup(esw, vport); - esw_acl_ingress_lgcy_cleanup(esw, vport); -} - static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_vport_destroy_legacy_acl_tables(esw, vport); + esw_legacy_vport_acl_cleanup(esw, vport); else esw_vport_destroy_offloads_acl_tables(esw, vport); } @@ -1390,15 +1132,9 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) { int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; - u16 max_sf_vports; u32 *out; int err; - max_sf_vports = mlx5_sf_max_functions(dev); - /* Device interface is array of 64-bits */ - if (max_sf_vports) - outlen += DIV_ROUND_UP(max_sf_vports, BITS_PER_TYPE(__be64)) * sizeof(__be64); - out = kvzalloc(outlen, GFP_KERNEL); if (!out) return ERR_PTR(-ENOMEM); @@ -1449,8 +1185,6 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) } /* Public E-Switch API */ -#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) - int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events) { @@ -1633,6 +1367,47 @@ static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) blocking_notifier_call_chain(&esw->n_head, 0, &info); } +static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + int total_vports; + int err; + + total_vports = mlx5_eswitch_get_total_vports(dev); + + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_egress_acls_init(dev, total_vports); + if (err) + return err; + } else { + esw_warn(dev, "engress ACL is not supported by FW\n"); + } + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_ingress_acls_init(dev, total_vports); + if (err) + goto err; + } else { + esw_warn(dev, "ingress ACL is not supported by FW\n"); + } + return 0; + +err: + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); + return err; +} + +static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + mlx5_fs_ingress_acls_cleanup(dev); + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); +} + /** * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch @@ -1661,14 +1436,12 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) return -EOPNOTSUPP; } - if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "ingress ACL is not supported by FW\n"); - - if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "engress ACL is not supported by FW\n"); - mlx5_eswitch_get_devlink_param(esw); + err = mlx5_esw_acls_ns_init(esw); + if (err) + return err; + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); esw_create_tsar(esw); @@ -1704,6 +1477,7 @@ abort: mlx5_rescan_drivers(esw->dev); esw_destroy_tsar(esw); + mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1719,7 +1493,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { int ret; - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return 0; down_write(&esw->mode_lock); @@ -1772,6 +1546,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) mlx5_rescan_drivers(esw->dev); esw_destroy_tsar(esw); + mlx5_esw_acls_ns_cleanup(esw); if (clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); @@ -1779,7 +1554,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) { - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return; down_write(&esw->mode_lock); @@ -1862,7 +1637,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); - esw_offloads_cleanup_reps(esw); kfree(esw->vports); kfree(esw); return err; @@ -1877,7 +1651,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); - esw_offloads_cleanup_reps(esw); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1885,6 +1658,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); mutex_destroy(&esw->offloads.encap_tbl_lock); mutex_destroy(&esw->offloads.decap_tbl_lock); + esw_offloads_cleanup_reps(esw); kfree(esw->vports); kfree(esw); } @@ -2030,7 +1804,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int other_vport = 1; int err = 0; - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); @@ -2112,205 +1886,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, return err; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - u16 vport, u16 vlan, u8 qos) -{ - u8 set_flags = 0; - int err = 0; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - - if (vlan || qos) - set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; - - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - if (!vlan) - goto unlock; /* compatibility with libvirt */ - - err = -EOPNOTSUPP; - goto unlock; - } - - err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - -int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, - u16 vport, bool spoofchk) -{ - struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - bool pschk; - int err = 0; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - if (IS_ERR(evport)) - return PTR_ERR(evport); - - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto unlock; - } - pschk = evport->info.spoofchk; - evport->info.spoofchk = spoofchk; - if (pschk && !is_valid_ether_addr(evport->info.mac)) - mlx5_core_warn(esw->dev, - "Spoofchk in set while MAC is invalid, vport(%d)\n", - evport->vport); - if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) - err = esw_acl_ingress_lgcy_setup(esw, evport); - if (err) - evport->info.spoofchk = pschk; - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - -static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) -{ - if (esw->fdb_table.legacy.vepa_uplink_rule) - mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); - - if (esw->fdb_table.legacy.vepa_star_rule) - mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); - - esw->fdb_table.legacy.vepa_uplink_rule = NULL; - esw->fdb_table.legacy.vepa_star_rule = NULL; -} - -static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, - u8 setting) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *flow_rule; - struct mlx5_flow_spec *spec; - int err = 0; - void *misc; - - if (!setting) { - esw_cleanup_vepa_rules(esw); - return 0; - } - - if (esw->fdb_table.legacy.vepa_uplink_rule) - return 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - /* Uplink rule forward uplink traffic to FDB */ - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = esw->fdb_table.legacy.fdb; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, - &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); - goto out; - } else { - esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; - } - - /* Star rule to forward all traffic to uplink vport */ - memset(&dest, 0, sizeof(dest)); - dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = MLX5_VPORT_UPLINK; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, - &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); - goto out; - } else { - esw->fdb_table.legacy.vepa_star_rule = flow_rule; - } - -out: - kvfree(spec); - if (err) - esw_cleanup_vepa_rules(esw); - return err; -} - -int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) -{ - int err = 0; - - if (!esw) - return -EOPNOTSUPP; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto out; - } - - err = _mlx5_eswitch_set_vepa_locked(esw, setting); - -out: - mutex_unlock(&esw->state_lock); - return err; -} - -int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) -{ - if (!esw) - return -EOPNOTSUPP; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - - if (esw->mode != MLX5_ESWITCH_LEGACY) - return -EOPNOTSUPP; - - *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; - return 0; -} - -int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, - u16 vport, bool setting) -{ - struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - int err = 0; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - if (IS_ERR(evport)) - return PTR_ERR(evport); - - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto unlock; - } - evport->info.trusted = setting; - if (evport->enabled) - esw_vport_change_handle_locked(evport); - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); @@ -2376,7 +1951,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, bool max_rate_supported; int err = 0; - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); @@ -2415,50 +1990,6 @@ unlock: return err; } -static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, - struct mlx5_vport *vport, - struct mlx5_vport_drop_stats *stats) -{ - struct mlx5_eswitch *esw = dev->priv.eswitch; - u64 rx_discard_vport_down, tx_discard_vport_down; - u64 bytes = 0; - int err = 0; - - if (esw->mode != MLX5_ESWITCH_LEGACY) - return 0; - - mutex_lock(&esw->state_lock); - if (!vport->enabled) - goto unlock; - - if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) - mlx5_fc_query(dev, vport->egress.legacy.drop_counter, - &stats->rx_dropped, &bytes); - - if (vport->ingress.legacy.drop_counter) - mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, - &stats->tx_dropped, &bytes); - - if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && - !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - goto unlock; - - err = mlx5_query_vport_down_stats(dev, vport->vport, 1, - &rx_discard_vport_down, - &tx_discard_vport_down); - if (err) - goto unlock; - - if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) - stats->rx_dropped += rx_discard_vport_down; - if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - stats->tx_dropped += tx_discard_vport_down; - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, u16 vport_num, struct ifla_vf_stats *vf_stats) @@ -2526,7 +2057,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, vf_stats->broadcast = MLX5_GET_CTR(out, received_eth_broadcast.packets); - err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats); + err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats); if (err) goto free_out; vf_stats->rx_dropped = stats.rx_dropped; @@ -2541,7 +2072,7 @@ u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; - return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE; + return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); @@ -2551,7 +2082,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) struct mlx5_eswitch *esw; esw = dev->priv.eswitch; - return ESW_ALLOWED(esw) ? esw->offloads.encap : + return mlx5_esw_allowed(esw) ? esw->offloads.encap : DEVLINK_ESWITCH_ENCAP_MODE_NONE; } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); @@ -2597,7 +2128,7 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev) struct mlx5_eswitch *esw = mdev->priv.eswitch; /* e.g. VF doesn't have eswitch so nothing to do */ - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return true; if (down_read_trylock(&esw->mode_lock) != 0) @@ -2614,7 +2145,7 @@ void mlx5_esw_release(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - if (ESW_ALLOWED(esw)) + if (mlx5_esw_allowed(esw)) up_read(&esw->mode_lock); } @@ -2626,7 +2157,7 @@ void mlx5_esw_get(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - if (ESW_ALLOWED(esw)) + if (mlx5_esw_allowed(esw)) atomic64_inc(&esw->user_count); } @@ -2638,7 +2169,7 @@ void mlx5_esw_put(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - if (ESW_ALLOWED(esw)) + if (mlx5_esw_allowed(esw)) atomic64_dec_if_positive(&esw->user_count); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index deafb0e03787..b289d756a7e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -152,7 +152,6 @@ enum mlx5_eswitch_vport_event { struct mlx5_vport { struct mlx5_core_dev *dev; - int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; struct mlx5_flow_handle *promisc_rule; @@ -174,6 +173,7 @@ struct mlx5_vport { u32 max_rate; } qos; + u16 vport; bool enabled; enum mlx5_eswitch_vport_event enabled_events; struct devlink_port *dl_port; @@ -314,6 +314,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); @@ -519,6 +521,11 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) +static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw) +{ + return esw && MLX5_ESWITCH_MANAGER(esw->dev); +} + /* The returned number is valid only when the dev is eswitch manager. */ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) { @@ -807,6 +814,8 @@ void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); +void esw_vport_change_handle_locked(struct mlx5_vport *vport); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ac92ffc8a5d3..bbb707117296 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -400,6 +400,14 @@ esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act * return i; } +static bool +esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_GEN(esw->dev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); +} + static int esw_setup_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@ -413,9 +421,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, int err = 0; if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && - MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && - mlx5_eswitch_vport_match_metadata_enabled(esw) && - MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + esw_src_port_rewrite_supported(esw)) attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { @@ -909,7 +915,8 @@ out: } struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch_rep *rep, u32 sqn) { struct mlx5_flow_act flow_act = {0}; @@ -927,21 +934,30 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(rep->esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = vport; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) - esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); + esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", + PTR_ERR(flow_rule)); out: kvfree(spec); return flow_rule; @@ -970,12 +986,13 @@ static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) static int mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) { - int num_vfs, vport_num, rule_idx = 0, err = 0; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; + int num_vfs, rule_idx = 0, err = 0; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_handle **flows; struct mlx5_flow_spec *spec; + u16 vport_num; num_vfs = esw->esw_funcs.num_vfs; flows = kvzalloc(num_vfs * sizeof(*flows), GFP_KERNEL); @@ -1607,6 +1624,12 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); @@ -1620,36 +1643,40 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.send_to_vport_grp = g; - /* meta send to vport */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); - - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + if (esw_src_port_rewrite_supported(esw)) { + /* meta send to vport */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - num_vfs = esw->esw_funcs.num_vfs; - if (num_vfs) { - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1); - ix += num_vfs; + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", - err); - goto send_vport_meta_err; + num_vfs = esw->esw_funcs.num_vfs; + if (num_vfs) { + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, ix + num_vfs - 1); + ix += num_vfs; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", + err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); + if (err) + goto meta_rule_err; } - esw->fdb_table.offloads.send_to_vport_meta_grp = g; - - err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); - if (err) - goto meta_rule_err; } if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { @@ -2199,13 +2226,8 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw) { - int err; - err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); - if (err) - return err; - - return 0; + return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); } static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) @@ -2330,8 +2352,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -static bool -esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) { if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) return false; @@ -2431,6 +2452,28 @@ metadata_err: return err; } +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) +{ + int err = 0; + + down_write(&esw->mode_lock); + if (esw->mode != MLX5_ESWITCH_NONE) { + err = -EBUSY; + goto done; + } + if (!mlx5_esw_vport_match_metadata_supported(esw)) { + err = -EOPNOTSUPP; + goto done; + } + if (enable) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + else + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +done: + up_write(&esw->mode_lock); + return err; +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -2652,9 +2695,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_metadata; - if (esw_check_vport_match_metadata_supported(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - err = esw_offloads_metadata_init(esw); if (err) goto err_metadata; @@ -2704,7 +2744,6 @@ err_pool: err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); return err; @@ -2740,7 +2779,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_steering_cleanup(esw); mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; @@ -3083,6 +3121,7 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { if (likely(mlx5_eswitch_vport_has_rep(esw, i))) { + rep->esw = esw; rep_data = &rep->rep_data[rep_type]; atomic_set(&rep_data->state, REP_REGISTERED); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index ec679560a95d..a81ece94f599 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -83,14 +83,16 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, ft_attr.autogroup.max_num_groups = 1; tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { - esw_warn(dev, "Failed to create termination table\n"); + esw_warn(dev, "Failed to create termination table (error %d)\n", + IS_ERR(tt->termtbl)); return -EOPNOTSUPP; } tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); if (IS_ERR(tt->rule)) { - esw_warn(dev, "Failed to create termination table rule\n"); + esw_warn(dev, "Failed to create termination table rule (error %d)\n", + IS_ERR(tt->rule)); goto add_flow_err; } return 0; @@ -140,10 +142,9 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); - if (err) { - esw_warn(esw->dev, "Failed to create termination table\n"); + if (err) goto tt_create_err; - } + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); tt_add_ref: tt->ref_count++; @@ -282,7 +283,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, &dest[i], attr); if (IS_ERR(tt)) { - esw_warn(esw->dev, "Failed to create termination table\n"); + esw_warn(esw->dev, "Failed to get termination table (error %d)\n", + IS_ERR(tt)); goto revert_changes; } attr->dests[num_vport_dests].termtbl = tt; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index d43a05e77f67..0bba92cf5dc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -850,7 +850,7 @@ mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) return; } - if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action & + if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) ida_free(&fipsec->halloc, sa_ctx->sa_handle); @@ -1085,6 +1085,7 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns, rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); if (IS_ERR(rule->ctx)) { int err = PTR_ERR(rule->ctx); + kfree(rule); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0216bd63a42d..f74d2c834037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2229,17 +2229,21 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d { struct mlx5_flow_steering *steering = dev->priv.steering; - if (!steering || vport >= mlx5_eswitch_get_total_vports(dev)) + if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (vport >= steering->esw_egress_acl_vports) + return NULL; if (steering->esw_egress_root_ns && steering->esw_egress_root_ns[vport]) return &steering->esw_egress_root_ns[vport]->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (vport >= steering->esw_ingress_acl_vports) + return NULL; if (steering->esw_ingress_root_ns && steering->esw_ingress_root_ns[vport]) return &steering->esw_ingress_root_ns[vport]->ns; @@ -2571,43 +2575,11 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) clean_tree(&root_ns->ns.node); } -static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_egress_root_ns) - return; - - for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) - cleanup_root_ns(steering->esw_egress_root_ns[i]); - - kfree(steering->esw_egress_root_ns); - steering->esw_egress_root_ns = NULL; -} - -static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_ingress_root_ns) - return; - - for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) - cleanup_root_ns(steering->esw_ingress_root_ns[i]); - - kfree(steering->esw_ingress_root_ns); - steering->esw_ingress_root_ns = NULL; -} - void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; cleanup_root_ns(steering->root_ns); - cleanup_egress_acls_root_ns(dev); - cleanup_ingress_acls_root_ns(dev); cleanup_root_ns(steering->fdb_root_ns); steering->fdb_root_ns = NULL; kfree(steering->fdb_sub_ns); @@ -2852,10 +2824,9 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo return PTR_ERR_OR_ZERO(prio); } -static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports) { struct mlx5_flow_steering *steering = dev->priv.steering; - int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; @@ -2871,7 +2842,7 @@ static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) if (err) goto cleanup_root_ns; } - + steering->esw_egress_acl_vports = total_vports; return 0; cleanup_root_ns: @@ -2882,10 +2853,24 @@ cleanup_root_ns: return err; } -static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_egress_root_ns) + return; + + for (i = 0; i < steering->esw_egress_acl_vports; i++) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; +} + +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports) { struct mlx5_flow_steering *steering = dev->priv.steering; - int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; @@ -2901,7 +2886,7 @@ static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) if (err) goto cleanup_root_ns; } - + steering->esw_ingress_acl_vports = total_vports; return 0; cleanup_root_ns: @@ -2912,6 +2897,21 @@ cleanup_root_ns: return err; } +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_ingress_root_ns) + return; + + for (i = 0; i < steering->esw_ingress_acl_vports; i++) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; +} + static int init_egress_root_ns(struct mlx5_flow_steering *steering) { int err; @@ -2974,16 +2974,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) if (err) goto err; } - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acls_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acls_root_ns(dev); - if (err) - goto err; - } } if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b24a9849c45e..e577a2c424af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -129,6 +129,8 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *rdma_rx_root_ns; struct mlx5_flow_root_namespace *rdma_tx_root_ns; struct mlx5_flow_root_namespace *egress_root_ns; + int esw_egress_acl_vports; + int esw_ingress_acl_vports; }; struct fs_node { @@ -287,6 +289,11 @@ int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index f43caefd07a1..18e5aec14641 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -497,13 +497,13 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; - bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), - GFP_KERNEL); + bulk = kvzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), + GFP_KERNEL); if (!bulk) goto err_alloc_bulk; - bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), - GFP_KERNEL); + bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); if (!bulk->bitmask) goto err_alloc_bitmask; @@ -521,9 +521,9 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) return bulk; err_mlx5_cmd_bulk_alloc: - kfree(bulk->bitmask); + kvfree(bulk->bitmask); err_alloc_bitmask: - kfree(bulk); + kvfree(bulk); err_alloc_bulk: return ERR_PTR(err); } @@ -537,8 +537,8 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) } mlx5_cmd_fc_free(dev, bulk->base_id); - kfree(bulk->bitmask); - kfree(bulk); + kvfree(bulk->bitmask); + kvfree(bulk); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index b65b0cefc5b3..612a7f69366d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -481,28 +481,19 @@ static const struct mlx5e_profile mlx5i_nic_profile = { static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - struct mlx5e_channels new_channels = {}; - struct mlx5e_params *params; + struct mlx5e_params new_params; int err = 0; mutex_lock(&priv->state_lock); - params = &priv->channels.params; + new_params = priv->channels.params; + new_params.sw_mtu = new_mtu; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - params->sw_mtu = new_mtu; - netdev->mtu = params->sw_mtu; - goto out; - } - - new_channels.params = *params; - new_channels.params.sw_mtu = new_mtu; - - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); if (err) goto out; - netdev->mtu = new_channels.params.sw_mtu; + netdev->mtu = new_params.sw_mtu; out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 127bb92da150..b8748390335f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -603,8 +603,6 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (err) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); - - return; } /* Must be called with intf_mutex held */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 88e58ac902de..2c41a6920264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -35,7 +35,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) } /** - * Set lag port affinity + * mlx5_lag_set_port_affinity * * @ldev: lag device * @port: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 1e7f26b240de..ce696d523493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -645,16 +645,19 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) return PTP_PF_NONE; } -static int mlx5_init_pin_config(struct mlx5_clock *clock) +static void mlx5_init_pin_config(struct mlx5_clock *clock) { int i; + if (!clock->ptp_info.n_pins) + return; + clock->ptp_info.pin_config = kcalloc(clock->ptp_info.n_pins, sizeof(*clock->ptp_info.pin_config), GFP_KERNEL); if (!clock->ptp_info.pin_config) - return -ENOMEM; + return; clock->ptp_info.enable = mlx5_ptp_enable; clock->ptp_info.verify = mlx5_ptp_verify; clock->ptp_info.pps = 1; @@ -667,8 +670,6 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock) clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); clock->ptp_info.pin_config[i].chan = 0; } - - return 0; } static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) @@ -859,6 +860,17 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) } } +static void mlx5_init_pps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_PPS_CAP(mdev)) + return; + + mlx5_get_pps_caps(mdev); + mlx5_init_pin_config(clock); +} + void mlx5_init_clock(struct mlx5_core_dev *mdev) { struct mlx5_clock *clock = &mdev->clock; @@ -876,10 +888,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) clock->ptp_info = mlx5_ptp_clock_info; /* Initialize 1PPS data structures */ - if (MLX5_PPS_CAP(mdev)) - mlx5_get_pps_caps(mdev); - if (clock->ptp_info.n_pins) - mlx5_init_pin_config(clock); + mlx5_init_pps(mdev); clock->ptp = ptp_clock_register(&clock->ptp_info, &mdev->pdev->dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 81f2cc4ca1da..f607a3858ef5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -22,15 +22,15 @@ struct mlx5_cq_table { }; struct mlx5_eq { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; struct mlx5_core_dev *dev; struct mlx5_cq_table cq_table; __be32 __iomem *doorbell; u32 cons_index; - struct mlx5_frag_buf buf; unsigned int vecidx; unsigned int irqn; u8 eqn; - int nent; struct mlx5_rsc_debug *dbg; }; @@ -47,16 +47,21 @@ struct mlx5_eq_comp { struct list_head list; }; +static inline u32 eq_get_size(struct mlx5_eq *eq) +{ + return eq->fbc.sz_m1 + 1; +} + static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) { - return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); + return mlx5_frag_buf_get_wqe(&eq->fbc, entry); } static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) { - struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1); - return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; + return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe; } static inline void eq_update_ci(struct mlx5_eq *eq, int arm) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e3a417d17707..c114365eb126 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -571,6 +571,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) mlx5_vhca_state_cap_handle(dev, set_hca_cap); + if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)) + MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, + MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } @@ -1741,6 +1745,8 @@ static struct pci_driver mlx5_core_driver = { .shutdown = shutdown, .err_handler = &mlx5_err_handler, .sriov_configure = mlx5_core_sriov_configure, + .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix, + .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, }; static void mlx5_core_verify_params(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 37c8ec7d2217..a22b706eebd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -140,6 +140,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, @@ -174,6 +175,11 @@ int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb); + +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, + int msix_vec_count); +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); + struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); @@ -275,4 +281,10 @@ int mlx5_load_one(struct mlx5_core_dev *dev); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); +static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index a61e09aff152..1f907df5b3a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -61,6 +61,79 @@ static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) return &irq_table->irq[vecidx]; } +/** + * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors + * to be ssigned to each VF. + * @dev: PF to work on + * @num_vfs: Number of enabled VFs + */ +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) +{ + int num_vf_msix, min_msix, max_msix; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + /* Limit maximum number of MSI-X vectors so the default configuration + * has some available in the pool. This will allow the user to increase + * the number of vectors in a VF without having to first size-down other + * VFs. + */ + return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); +} + +/** + * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF + * @dev: PF to work on + * @function_id: Internal PCI VF function IDd + * @msix_vec_count: Number of MSI-X vectors to set + */ +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, + int msix_vec_count) +{ + int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int num_vf_msix, min_msix, max_msix; + void *hca_cap, *cap; + int ret; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) + return -EOPNOTSUPP; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + if (msix_vec_count < min_msix) + return -EINVAL; + + if (msix_vec_count > max_msix) + return -EOVERFLOW; + + hca_cap = kzalloc(sz, GFP_KERNEL); + if (!hca_cap) + return -ENOMEM; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); + kfree(hca_cap); + return ret; +} + int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { @@ -94,7 +167,6 @@ static void irq_set_name(char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx - MLX5_IRQ_VEC_COMP_BASE); - return; } static int request_irqs(struct mlx5_core_dev *dev, int nvec) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 4bb219565c58..1ef2b6a848c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -353,69 +353,123 @@ static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset *offset -= MLX5_EEPROM_PAGE_LENGTH; } -int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, - u16 offset, u16 size, u8 *data) +static int mlx5_query_mcia(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, u8 *data) { - int module_num, status, err, page_num = 0; u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; u32 out[MLX5_ST_SZ_DW(mcia_reg)]; - u16 i2c_addr = 0; - u8 module_id; + int status, err; void *ptr; + u16 size; - err = mlx5_query_module_num(dev, &module_num); + size = min_t(int, params->size, MLX5_EEPROM_MAX_BYTES); + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, size, size); + MLX5_SET(mcia_reg, in, module, params->module_number); + MLX5_SET(mcia_reg, in, device_address, params->offset); + MLX5_SET(mcia_reg, in, page_number, params->page); + MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); if (err) return err; - err = mlx5_query_module_id(dev, module_num, &module_id); + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + memcpy(data, ptr, size); + + return size; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + struct mlx5_module_eeprom_query_params query = {0}; + u8 module_id; + int err; + + err = mlx5_query_module_num(dev, &query.module_number); + if (err) + return err; + + err = mlx5_query_module_id(dev, query.module_number, &module_id); if (err) return err; switch (module_id) { case MLX5_MODULE_ID_SFP: - mlx5_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); break; case MLX5_MODULE_ID_QSFP: case MLX5_MODULE_ID_QSFP_PLUS: case MLX5_MODULE_ID_QSFP28: - mlx5_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); break; default: mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); return -EINVAL; } - if (offset + size > MLX5_EEPROM_PAGE_LENGTH) + if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; - size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + query.size = size; - MLX5_SET(mcia_reg, in, l, 0); - MLX5_SET(mcia_reg, in, module, module_num); - MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); - MLX5_SET(mcia_reg, in, page_number, page_num); - MLX5_SET(mcia_reg, in, device_address, offset); - MLX5_SET(mcia_reg, in, size, size); + return mlx5_query_mcia(dev, &query, data); +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_MCIA, 0, 0); +int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, + u8 *data) +{ + u8 module_id; + int err; + + err = mlx5_query_module_num(dev, ¶ms->module_number); if (err) return err; - status = MLX5_GET(mcia_reg, out, status); - if (status) { - mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", - status); - return -EIO; + err = mlx5_query_module_id(dev, params->module_number, &module_id); + if (err) + return err; + + switch (module_id) { + case MLX5_MODULE_ID_SFP: + if (params->page > 0) + return -EINVAL; + break; + case MLX5_MODULE_ID_QSFP: + case MLX5_MODULE_ID_QSFP28: + case MLX5_MODULE_ID_QSFP_PLUS: + if (params->page > 3) + return -EINVAL; + break; + case MLX5_MODULE_ID_DSFP: + break; + default: + mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + return -EINVAL; } - ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); - memcpy(data, ptr, size); + if (params->i2c_address != MLX5_I2C_ADDR_HIGH && + params->i2c_address != MLX5_I2C_ADDR_LOW) { + mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address); + return -EINVAL; + } - return size; + return mlx5_query_mcia(dev, params, data); } -EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page); static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 8e0dddc6383f..441b5453acae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -180,5 +180,4 @@ del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); - return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 60a6328a9ca0..52226d9b9a6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -270,15 +270,14 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, { struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_sf *sf; - u16 hw_fn_id; int err; sf = mlx5_sf_alloc(table, new_attr->sfnum, extack); if (IS_ERR(sf)) return PTR_ERR(sf); - hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sf->id); - err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, hw_fn_id, new_attr->sfnum); + err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id, + new_attr->sfnum); if (err) goto esw_err; *new_port_index = sf->port_index; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index c9bddde04047..ec53c11c8344 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -67,8 +67,8 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) goto exist_err; } - hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id); - err = mlx5_cmd_alloc_sf(table->dev, hw_fn_id); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sw_id); + err = mlx5_cmd_alloc_sf(dev, hw_fn_id); if (err) goto err; @@ -80,7 +80,7 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) return sw_id; vhca_err: - mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); err: table->sfs[i].allocated = false; exist_err: @@ -93,8 +93,8 @@ static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id) struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; u16 hw_fn_id; - hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, id); - mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); table->sfs[id].allocated = false; table->sfs[id].pending_delete = false; } @@ -123,7 +123,7 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id) goto err; state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); if (state == MLX5_VHCA_STATE_ALLOCATED) { - mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); table->sfs[id].allocated = false; } else { table->sfs[id].pending_delete = true; @@ -216,7 +216,7 @@ int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) return 0; table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event; - return mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb); + return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb); } void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) @@ -226,7 +226,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) if (!table) return; - mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); + mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb); /* Dealloc SFs whose firmware event has been missed. */ mlx5_sf_hw_dealloc_all(table); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 3094d20297a9..2338989d4403 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -71,8 +71,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; - int vf; + int err, vf, num_msix_count; if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; @@ -85,12 +84,22 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } enable_vfs_hca: + num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); if (err) { mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); continue; } + + err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count); + if (err) { + mlx5_core_warn(dev, + "failed to set MSI-X vector counts VF %d, err %d\n", + vf, err); + continue; + } + sriov->vfs_ctx[vf].enabled = 1; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { err = sriov_restore_guids(dev, vf); @@ -178,6 +187,41 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return err ? err : num_vfs; } +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count) +{ + struct pci_dev *pf = pci_physfn(vf); + struct mlx5_core_sriov *sriov; + struct mlx5_core_dev *dev; + int num_vf_msix, id; + + dev = pci_get_drvdata(pf); + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return -EOPNOTSUPP; + + if (!msix_vec_count) + msix_vec_count = + mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf)); + + sriov = &dev->priv.sriov; + + /* Reversed translation of PCI VF function number to the internal + * function_id, which exists in the name of virtfn symlink. + */ + for (id = 0; id < pci_num_vf(pf); id++) { + if (!sriov->vfs_ctx[id].enabled) + continue; + + if (vf->devfn == pci_iov_virtfn_devfn(pf, id)) + break; + } + + if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled) + return -EINVAL; + + return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count); +} + int mlx5_sriov_attach(struct mlx5_core_dev *dev) { if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 28a7971cac6a..949879cf2092 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -313,8 +313,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, * table, since there is an *assumption* that in such case FW * will recalculate the CS. */ - if (dest_action->dest_tbl.is_fw_tbl) { - *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr; + if (dest_action->dest_tbl->is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr; } else { mlx5dr_dbg(dmn, "Destination FT should be terminating when modify TTL is used\n"); @@ -326,8 +326,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, /* If destination is vport we will get the FW flow table * that recalculates the CS and forwards to the vport. */ - ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn, - dest_action->vport.caps->num, + ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport->dmn, + dest_action->vport->caps->num, final_icm_addr); if (ret) { mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); @@ -369,6 +369,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type); for (i = 0; i < num_actions; i++) { + struct mlx5dr_action_dest_tbl *dest_tbl; struct mlx5dr_action *action; int max_actions_type = 1; u32 action_type; @@ -382,37 +383,38 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, break; case DR_ACTION_TYP_FT: dest_action = action; - if (!action->dest_tbl.is_fw_tbl) { - if (action->dest_tbl.tbl->dmn != dmn) { + dest_tbl = action->dest_tbl; + if (!dest_tbl->is_fw_tbl) { + if (dest_tbl->tbl->dmn != dmn) { mlx5dr_err(dmn, "Destination table belongs to a different domain\n"); goto out_invalid_arg; } - if (action->dest_tbl.tbl->level <= matcher->tbl->level) { + if (dest_tbl->tbl->level <= matcher->tbl->level) { mlx5_core_warn_once(dmn->mdev, "Connecting table to a lower/same level destination table\n"); mlx5dr_dbg(dmn, "Connecting table at level %d to a destination table at level %d\n", matcher->tbl->level, - action->dest_tbl.tbl->level); + dest_tbl->tbl->level); } attr.final_icm_addr = rx_rule ? - action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : - action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr; + dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : + dest_tbl->tbl->tx.s_anchor->chunk->icm_addr; } else { struct mlx5dr_cmd_query_flow_table_details output; int ret; /* get the relevant addresses */ - if (!action->dest_tbl.fw_tbl.rx_icm_addr) { + if (!action->dest_tbl->fw_tbl.rx_icm_addr) { ret = mlx5dr_cmd_query_flow_table(dmn->mdev, - action->dest_tbl.fw_tbl.type, - action->dest_tbl.fw_tbl.id, + dest_tbl->fw_tbl.type, + dest_tbl->fw_tbl.id, &output); if (!ret) { - action->dest_tbl.fw_tbl.tx_icm_addr = + dest_tbl->fw_tbl.tx_icm_addr = output.sw_owner_icm_root_1; - action->dest_tbl.fw_tbl.rx_icm_addr = + dest_tbl->fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0; } else { mlx5dr_err(dmn, @@ -422,50 +424,50 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, } } attr.final_icm_addr = rx_rule ? - action->dest_tbl.fw_tbl.rx_icm_addr : - action->dest_tbl.fw_tbl.tx_icm_addr; + dest_tbl->fw_tbl.rx_icm_addr : + dest_tbl->fw_tbl.tx_icm_addr; } break; case DR_ACTION_TYP_QP: mlx5dr_info(dmn, "Domain doesn't support QP\n"); goto out_invalid_arg; case DR_ACTION_TYP_CTR: - attr.ctr_id = action->ctr.ctr_id + - action->ctr.offeset; + attr.ctr_id = action->ctr->ctr_id + + action->ctr->offeset; break; case DR_ACTION_TYP_TAG: - attr.flow_tag = action->flow_tag; + attr.flow_tag = action->flow_tag->flow_tag; break; case DR_ACTION_TYP_TNL_L2_TO_L2: break; case DR_ACTION_TYP_TNL_L3_TO_L2: - attr.decap_index = action->rewrite.index; - attr.decap_actions = action->rewrite.num_of_actions; + attr.decap_index = action->rewrite->index; + attr.decap_actions = action->rewrite->num_of_actions; attr.decap_with_vlan = attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; break; case DR_ACTION_TYP_MODIFY_HDR: - attr.modify_index = action->rewrite.index; - attr.modify_actions = action->rewrite.num_of_actions; - recalc_cs_required = action->rewrite.modify_ttl && + attr.modify_index = action->rewrite->index; + attr.modify_actions = action->rewrite->num_of_actions; + recalc_cs_required = action->rewrite->modify_ttl && !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: - attr.reformat_size = action->reformat.reformat_size; - attr.reformat_id = action->reformat.reformat_id; + attr.reformat_size = action->reformat->reformat_size; + attr.reformat_id = action->reformat->reformat_id; break; case DR_ACTION_TYP_VPORT: - attr.hit_gvmi = action->vport.caps->vhca_gvmi; + attr.hit_gvmi = action->vport->caps->vhca_gvmi; dest_action = action; if (rx_rule) { /* Loopback on WIRE vport is not supported */ - if (action->vport.caps->num == WIRE_PORT) + if (action->vport->caps->num == WIRE_PORT) goto out_invalid_arg; - attr.final_icm_addr = action->vport.caps->icm_address_rx; + attr.final_icm_addr = action->vport->caps->icm_address_rx; } else { - attr.final_icm_addr = action->vport.caps->icm_address_tx; + attr.final_icm_addr = action->vport->caps->icm_address_tx; } break; case DR_ACTION_TYP_POP_VLAN: @@ -477,7 +479,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, if (attr.vlans.count == MLX5DR_MAX_VLANS) return -EINVAL; - attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr; + attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr; break; default: goto out_invalid_arg; @@ -530,17 +532,37 @@ out_invalid_arg: return -EINVAL; } +static unsigned int action_size[DR_ACTION_TYP_MAX] = { + [DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_FT] = sizeof(struct mlx5dr_action_dest_tbl), + [DR_ACTION_TYP_CTR] = sizeof(struct mlx5dr_action_ctr), + [DR_ACTION_TYP_TAG] = sizeof(struct mlx5dr_action_flow_tag), + [DR_ACTION_TYP_MODIFY_HDR] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport), + [DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan), +}; + static struct mlx5dr_action * dr_action_create_generic(enum mlx5dr_action_type action_type) { struct mlx5dr_action *action; + int extra_size; + + if (action_type < DR_ACTION_TYP_MAX) + extra_size = action_size[action_type]; + else + return NULL; - action = kzalloc(sizeof(*action), GFP_KERNEL); + action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL); if (!action) return NULL; action->action_type = action_type; refcount_set(&action->refcount, 1); + action->data = action + 1; return action; } @@ -559,10 +581,10 @@ mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) if (!action) return NULL; - action->dest_tbl.is_fw_tbl = true; - action->dest_tbl.fw_tbl.dmn = dmn; - action->dest_tbl.fw_tbl.id = table_num; - action->dest_tbl.fw_tbl.type = FS_FT_FDB; + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.id = table_num; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; refcount_inc(&dmn->refcount); return action; @@ -579,7 +601,7 @@ mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) if (!action) goto dec_ref; - action->dest_tbl.tbl = tbl; + action->dest_tbl->tbl = tbl; return action; @@ -624,12 +646,12 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, case DR_ACTION_TYP_VPORT: hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - hw_dests[i].vport.num = dest_action->vport.caps->num; - hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi; + hw_dests[i].vport.num = dest_action->vport->caps->num; + hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi; if (reformat_action) { reformat_req = true; hw_dests[i].vport.reformat_id = - reformat_action->reformat.reformat_id; + reformat_action->reformat->reformat_id; ref_actions[num_of_ref++] = reformat_action; hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; } @@ -637,10 +659,10 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, case DR_ACTION_TYP_FT: hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - if (dest_action->dest_tbl.is_fw_tbl) - hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id; + if (dest_action->dest_tbl->is_fw_tbl) + hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id; else - hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id; + hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id; break; default: @@ -657,8 +679,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, hw_dests, num_of_dests, reformat_req, - &action->dest_tbl.fw_tbl.id, - &action->dest_tbl.fw_tbl.group_id); + &action->dest_tbl->fw_tbl.id, + &action->dest_tbl->fw_tbl.group_id); if (ret) goto free_action; @@ -667,11 +689,11 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, for (i = 0; i < num_of_ref; i++) refcount_inc(&ref_actions[i]->refcount); - action->dest_tbl.is_fw_tbl = true; - action->dest_tbl.fw_tbl.dmn = dmn; - action->dest_tbl.fw_tbl.type = FS_FT_FDB; - action->dest_tbl.fw_tbl.ref_actions = ref_actions; - action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref; + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; + action->dest_tbl->fw_tbl.ref_actions = ref_actions; + action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref; kfree(hw_dests); @@ -696,10 +718,10 @@ mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn, if (!action) return NULL; - action->dest_tbl.is_fw_tbl = 1; - action->dest_tbl.fw_tbl.type = ft->type; - action->dest_tbl.fw_tbl.id = ft->id; - action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl->is_fw_tbl = 1; + action->dest_tbl->fw_tbl.type = ft->type; + action->dest_tbl->fw_tbl.id = ft->id; + action->dest_tbl->fw_tbl.dmn = dmn; refcount_inc(&dmn->refcount); @@ -715,7 +737,7 @@ mlx5dr_action_create_flow_counter(u32 counter_id) if (!action) return NULL; - action->ctr.ctr_id = counter_id; + action->ctr->ctr_id = counter_id; return action; } @@ -728,7 +750,7 @@ struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) if (!action) return NULL; - action->flow_tag = tag_value & 0xffffff; + action->flow_tag->flow_tag = tag_value & 0xffffff; return action; } @@ -794,8 +816,8 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, if (ret) return ret; - action->reformat.reformat_id = reformat_id; - action->reformat.reformat_size = data_sz; + action->reformat->reformat_id = reformat_id; + action->reformat->reformat_size = data_sz; return 0; } case DR_ACTION_TYP_TNL_L2_TO_L2: @@ -811,28 +833,28 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, data, data_sz, hw_actions, ACTION_CACHE_LINE_SIZE, - &action->rewrite.num_of_actions); + &action->rewrite->num_of_actions); if (ret) { mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); return ret; } - action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, - DR_CHUNK_SIZE_8); - if (!action->rewrite.chunk) { + action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + DR_CHUNK_SIZE_8); + if (!action->rewrite->chunk) { mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n"); return -ENOMEM; } - action->rewrite.data = (void *)hw_actions; - action->rewrite.index = (action->rewrite.chunk->icm_addr - + action->rewrite->data = (void *)hw_actions; + action->rewrite->index = (action->rewrite->chunk->icm_addr - dmn->info.caps.hdr_modify_icm_addr) / ACTION_CACHE_LINE_SIZE; ret = mlx5dr_send_postsend_action(dmn, action); if (ret) { mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n"); - mlx5dr_icm_free_chunk(action->rewrite.chunk); + mlx5dr_icm_free_chunk(action->rewrite->chunk); return ret; } return 0; @@ -867,7 +889,7 @@ struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, if (!action) return NULL; - action->push_vlan.vlan_hdr = vlan_hdr_h; + action->push_vlan->vlan_hdr = vlan_hdr_h; return action; } @@ -898,7 +920,7 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, if (!action) goto dec_ref; - action->reformat.dmn = dmn; + action->reformat->dmn = dmn; ret = dr_action_create_reformat_action(dmn, data_sz, @@ -1104,17 +1126,17 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { u16 sw_field = MLX5_GET(set_action_in, sw_action, field); - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { - action->rewrite.allow_rx = 0; + action->rewrite->allow_rx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", sw_field); return -EINVAL; } } else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { - action->rewrite.allow_tx = 0; + action->rewrite->allow_tx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", sw_field); @@ -1122,7 +1144,7 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, } } - if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) { + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n"); return -EINVAL; } @@ -1135,7 +1157,7 @@ dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { u16 sw_field = MLX5_GET(set_action_in, sw_action, field); - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && @@ -1153,7 +1175,7 @@ static int dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; u16 sw_fields[2]; int i; @@ -1162,14 +1184,14 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, for (i = 0; i < 2; i++) { if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { - action->rewrite.allow_rx = 0; + action->rewrite->allow_rx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", sw_fields[i]); return -EINVAL; } } else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { - action->rewrite.allow_tx = 0; + action->rewrite->allow_tx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", sw_fields[i]); @@ -1178,7 +1200,7 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, } } - if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) { + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n"); return -EINVAL; } @@ -1190,7 +1212,7 @@ static int dr_action_modify_check_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; u8 action_type; int ret; @@ -1239,7 +1261,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, { const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; const struct mlx5dr_ste_action_modify_field *hw_src_action_info; - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; int ret, i, hw_idx = 0; __be64 *sw_action; __be64 hw_action; @@ -1249,8 +1271,8 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, *modify_ttl = false; - action->rewrite.allow_rx = 1; - action->rewrite.allow_tx = 1; + action->rewrite->allow_rx = 1; + action->rewrite->allow_tx = 1; for (i = 0; i < num_sw_actions; i++) { sw_action = &sw_actions[i]; @@ -1358,13 +1380,13 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, if (ret) goto free_hw_actions; - action->rewrite.chunk = chunk; - action->rewrite.modify_ttl = modify_ttl; - action->rewrite.data = (u8 *)hw_actions; - action->rewrite.num_of_actions = num_hw_actions; - action->rewrite.index = (chunk->icm_addr - - dmn->info.caps.hdr_modify_icm_addr) / - ACTION_CACHE_LINE_SIZE; + action->rewrite->chunk = chunk; + action->rewrite->modify_ttl = modify_ttl; + action->rewrite->data = (u8 *)hw_actions; + action->rewrite->num_of_actions = num_hw_actions; + action->rewrite->index = (chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; ret = mlx5dr_send_postsend_action(dmn, action); if (ret) @@ -1399,7 +1421,7 @@ mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, if (!action) goto dec_ref; - action->rewrite.dmn = dmn; + action->rewrite->dmn = dmn; ret = dr_action_create_modify_action(dmn, actions_sz, @@ -1451,8 +1473,8 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, if (!action) return NULL; - action->vport.dmn = vport_dmn; - action->vport.caps = vport_cap; + action->vport->dmn = vport_dmn; + action->vport->caps = vport_cap; return action; } @@ -1464,44 +1486,44 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) switch (action->action_type) { case DR_ACTION_TYP_FT: - if (action->dest_tbl.is_fw_tbl) - refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount); + if (action->dest_tbl->is_fw_tbl) + refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount); else - refcount_dec(&action->dest_tbl.tbl->refcount); + refcount_dec(&action->dest_tbl->tbl->refcount); - if (action->dest_tbl.is_fw_tbl && - action->dest_tbl.fw_tbl.num_of_ref_actions) { + if (action->dest_tbl->is_fw_tbl && + action->dest_tbl->fw_tbl.num_of_ref_actions) { struct mlx5dr_action **ref_actions; int i; - ref_actions = action->dest_tbl.fw_tbl.ref_actions; - for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++) + ref_actions = action->dest_tbl->fw_tbl.ref_actions; + for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++) refcount_dec(&ref_actions[i]->refcount); kfree(ref_actions); - mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn, - action->dest_tbl.fw_tbl.id, - action->dest_tbl.fw_tbl.group_id); + mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn, + action->dest_tbl->fw_tbl.id, + action->dest_tbl->fw_tbl.group_id); } break; case DR_ACTION_TYP_TNL_L2_TO_L2: - refcount_dec(&action->reformat.dmn->refcount); + refcount_dec(&action->reformat->dmn->refcount); break; case DR_ACTION_TYP_TNL_L3_TO_L2: - mlx5dr_icm_free_chunk(action->rewrite.chunk); - refcount_dec(&action->reformat.dmn->refcount); + mlx5dr_icm_free_chunk(action->rewrite->chunk); + refcount_dec(&action->rewrite->dmn->refcount); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: - mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev, - action->reformat.reformat_id); - refcount_dec(&action->reformat.dmn->refcount); + mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev, + action->reformat->reformat_id); + refcount_dec(&action->reformat->dmn->refcount); break; case DR_ACTION_TYP_MODIFY_HDR: - mlx5dr_icm_free_chunk(action->rewrite.chunk); - kfree(action->rewrite.data); - refcount_dec(&action->rewrite.dmn->refcount); + mlx5dr_icm_free_chunk(action->rewrite->chunk); + kfree(action->rewrite->data); + refcount_dec(&action->rewrite->dmn->refcount); break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 30b0136b5bc7..5970cb8fc0c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -85,15 +85,53 @@ int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, return 0; } +static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev, + u16 vport, bool *roce_en) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + int err; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *roce_en = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.roce_en); + return 0; +} + int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, struct mlx5dr_cmd_caps *caps) { + bool roce_en; + int err; + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); + if (MLX5_CAP_GEN(mdev, roce)) { + err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); + if (err) + return err; + + caps->roce_caps.roce_en = roce_en; + caps->roce_caps.fl_rc_qp_when_roce_disabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); + caps->roce_caps.fl_rc_qp_when_roce_enabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); + } + + caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); @@ -106,6 +144,34 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); } + if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED) + caps->flex_parser_id_geneve_tlv_option_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED) + caps->flex_parser_id_mpls_over_gre = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre); + + if (caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) + caps->flex_parser_id_mpls_over_udp = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED) + caps->flex_parser_id_gtpu_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED) + caps->flex_parser_id_gtpu_teid = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED) + caps->flex_parser_id_gtpu_dw_2 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED) + caps->flex_parser_id_gtpu_first_ext_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0); + caps->nic_rx_drop_address = MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); caps->nic_tx_drop_address = @@ -287,7 +353,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, u32 *in; int err; - in = kzalloc(inlen, GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -302,7 +368,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, *group_id = MLX5_GET(create_flow_group_out, out, group_id); out: - kfree(in); + kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 15673cd10039..6f6191d1d5a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -92,15 +92,17 @@ static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc) misc->gre_k_present || misc->gre_s_present); } -#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \ - (_misc2).outer_first_mpls_over_##gre_udp##_label || \ - (_misc2).outer_first_mpls_over_##gre_udp##_exp || \ - (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ - (_misc2).outer_first_mpls_over_##gre_udp##_ttl) - -#define DR_MASK_IS_TNL_MPLS_SET(_misc2) ( \ - DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ - DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) + +#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) static bool dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) @@ -133,6 +135,11 @@ static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) misc->geneve_opt_len; } +static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->geneve_tlv_option_0_data; +} + static bool dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { @@ -148,6 +155,109 @@ dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, dr_matcher_supp_tnl_geneve(&dmn->info.caps); } +static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid; +} + +static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_set(&mask->misc3) && + dr_matcher_supp_tnl_gtpu(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_0 && + dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_teid && + dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_2 && + dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_first_ext_dw_0 && + dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) || + dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) || + dr_mask_is_tnl_gtpu(mask, dmn); +} + static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) { return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || @@ -199,6 +309,65 @@ static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) return (misc->source_sqn || misc->source_port); } +static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id, + u32 flex_parser_value) +{ + if (flex_parser_id) + return flex_parser_id <= DR_STE_MAX_FLEX_0_ID; + + /* Using flex_parser 0 means that id is zero, thus value must be set. */ + return flex_parser_value; +} + +static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0, + misc4->prog_sample_field_value_0) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1, + misc4->prog_sample_field_value_1) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2, + misc4->prog_sample_field_value_2) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3, + misc4->prog_sample_field_value_3)); +} + +static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id) +{ + return flex_parser_id > DR_STE_MAX_FLEX_0_ID && + flex_parser_id <= DR_STE_MAX_FLEX_1_ID; +} + +static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3)); +} + +static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps); +} int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, @@ -251,6 +420,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) mask.misc3 = matcher->mask.misc3; + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) + mask.misc4 = matcher->mask.misc4; + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, &matcher->mask, NULL); if (ret) @@ -321,9 +493,28 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn)) mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++], &mask, inner, rx); - else if (dr_mask_is_tnl_geneve(&mask, dmn)) + else if (dr_mask_is_tnl_geneve(&mask, dmn)) { mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++], &mask, inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3)) + mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) { + if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++], + &mask, inner, rx); + } if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++], @@ -333,17 +524,20 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) - mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++], - &mask, inner, rx); + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_icmp(&mask, dmn)) + mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); - if (dr_mask_is_icmp(&mask, dmn)) { - ret = mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++], - &mask, &dmn->info.caps, - inner, rx); - if (ret) - return ret; - } if (dr_mask_is_tnl_gre_set(&mask.misc)) mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++], &mask, inner, rx); @@ -404,10 +598,26 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) - mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++], - &mask, inner, rx); + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); } + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (dr_mask_is_flex_parser_0_3_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++], + &mask, false, rx); + + if (dr_mask_is_flex_parser_4_7_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++], + &mask, false, rx); + } + /* Empty matcher, takes all */ if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index b337d6626bff..43356fad53de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -952,6 +952,17 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, return false; } } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + s_idx = offsetof(struct mlx5dr_match_param, misc4); + e_idx = min(s_idx + sizeof(param->misc4), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, + "Rule misc4 parameters contains a value not specified by mask\n"); + return false; + } + } return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 8a6a56f9dc4e..12cf323a5943 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -32,6 +32,7 @@ struct dr_qp_rtr_attr { u8 min_rnr_timer; u8 sgid_index; u16 udp_src_port; + u8 fl:1; }; struct dr_qp_rts_attr { @@ -45,6 +46,7 @@ struct dr_qp_init_attr { u32 pdn; u32 max_send_wr; struct mlx5_uars_page *uar; + u8 isolate_vl_tc:1; }; static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) @@ -157,6 +159,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); MLX5_SET(qpc, qpc, pd, attr->pdn); MLX5_SET(qpc, qpc, uar_page, attr->uar->index); MLX5_SET(qpc, qpc, log_page_size, @@ -213,7 +216,7 @@ static void dr_destroy_qp(struct mlx5_core_dev *mdev, static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) { dma_wmb(); - *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); /* After wmb() the hw aware of new work */ wmb(); @@ -223,7 +226,7 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, - u32 opcode, int nreq) + u32 opcode, bool notify_hw) { struct mlx5_wqe_raddr_seg *wq_raddr; struct mlx5_wqe_ctrl_seg *wq_ctrl; @@ -255,16 +258,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; - if (nreq) + if (notify_hw) dr_cmd_notify_hw(dr_qp, wq_ctrl); } static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) { dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, - &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); + &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, - &send_info->read, MLX5_OPCODE_RDMA_READ, 1); + &send_info->read, MLX5_OPCODE_RDMA_READ, true); } /** @@ -406,7 +409,7 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, alloc_size = *num_stes * DR_STE_SIZE; } - *data = kzalloc(alloc_size, GFP_KERNEL); + *data = kvzalloc(alloc_size, GFP_KERNEL); if (!*data) return -ENOMEM; @@ -505,7 +508,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, } out_free: - kfree(data); + kvfree(data); return ret; } @@ -562,7 +565,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, } out_free: - kfree(data); + kvfree(data); return ret; } @@ -572,12 +575,12 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct postsend_info send_info = {}; int ret; - send_info.write.addr = (uintptr_t)action->rewrite.data; - send_info.write.length = action->rewrite.num_of_actions * + send_info.write.addr = (uintptr_t)action->rewrite->data; + send_info.write.length = action->rewrite->num_of_actions * DR_MODIFY_ACTION_SIZE; send_info.write.lkey = 0; - send_info.remote_addr = action->rewrite.chunk->mr_addr; - send_info.rkey = action->rewrite.chunk->rkey; + send_info.remote_addr = action->rewrite->chunk->mr_addr; + send_info.rkey = action->rewrite->chunk->rkey; ret = dr_postsend_icm_data(dmn, &send_info); @@ -650,6 +653,7 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, attr->udp_src_port); MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); MLX5_SET(qpc, qpc, min_rnr_nak, 1); MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); @@ -658,6 +662,19 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); } +static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) +{ + /* Check whether RC RoCE QP creation with force loopback is allowed. + * There are two separate capability bits for this: + * - force loopback when RoCE is enabled + * - force loopback when RoCE is disabled + */ + return ((caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_enabled) || + (!caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_disabled)); +} + static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) { struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; @@ -676,17 +693,26 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) } /* RTR */ - ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); - if (ret) - return ret; - rtr_attr.mtu = mtu; rtr_attr.qp_num = dr_qp->qpn; rtr_attr.min_rnr_timer = 12; rtr_attr.port_num = port; - rtr_attr.sgid_index = gid_index; rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + /* If QP creation with force loopback is allowed, then there + * is no need for GID index when creating the QP. + * Otherwise we query GID attributes and use GID index. + */ + rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); + if (!rtr_attr.fl) { + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, + &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.sgid_index = gid_index; + } + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); if (ret) { mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); @@ -900,6 +926,11 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) init_attr.pdn = dmn->pdn; init_attr.uar = dmn->uar; init_attr.max_send_wr = QUEUE_SIZE; + + /* Isolated VL is applicable only if force loopback is supported */ + if (dr_send_allow_fl(&dmn->info.caps)) + init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; + spin_lock_init(&dmn->send_ring->lock); dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index f49abc7a4b9b..9b1529137cba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -852,6 +852,35 @@ static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code); spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type); spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code); + spec->geneve_tlv_option_0_data = + MLX5_GET(fte_match_set_misc3, mask, geneve_tlv_option_0_data); + spec->gtpu_msg_flags = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_flags); + spec->gtpu_msg_type = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_type); + spec->gtpu_teid = MLX5_GET(fte_match_set_misc3, mask, gtpu_teid); + spec->gtpu_dw_0 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_0); + spec->gtpu_dw_2 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_2); + spec->gtpu_first_ext_dw_0 = + MLX5_GET(fte_match_set_misc3, mask, gtpu_first_ext_dw_0); +} + +static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec) +{ + spec->prog_sample_field_id_0 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_0); + spec->prog_sample_field_value_0 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_0); + spec->prog_sample_field_id_1 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_1); + spec->prog_sample_field_value_1 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_1); + spec->prog_sample_field_id_2 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_2); + spec->prog_sample_field_value_2 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_2); + spec->prog_sample_field_id_3 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_3); + spec->prog_sample_field_value_3 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_3); } void mlx5dr_ste_copy_param(u8 match_criteria, @@ -925,6 +954,20 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } dr_ste_copy_mask_misc3(buff, &set_param->misc3); } + + param_location += sizeof(struct mlx5dr_match_misc3); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc4)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc4(buff, &set_param->misc4); + } } void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, @@ -1051,26 +1094,40 @@ void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_gre_init(sb, mask); } -void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) { sb->rx = rx; sb->inner = inner; - ste_ctx->build_tnl_mpls_init(sb, mask); + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask); } -int mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx) +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) { sb->rx = rx; sb->inner = inner; sb->caps = caps; - return ste_ctx->build_icmp_init(sb, mask); + ste_ctx->build_icmp_init(sb, mask); } void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, @@ -1113,6 +1170,52 @@ void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_geneve_init(sb, mask); } +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask); +} + void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -1148,6 +1251,26 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_src_gvmi_qpn_init(sb, mask); } +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_1_init(sb, mask); +} + static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = { [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0, [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 06bcb0ee8f96..992b591bf0c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -62,6 +62,13 @@ in_out##_first_mpls_ttl); \ } while (0) +#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \ + u8 parser_id = (caps)->flex_parser_id_##fname; \ + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \ + *(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\ + (spec)->fname = 0;\ +} while (0) + #define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ (_misc)->outer_first_mpls_over_gre_label || \ (_misc)->outer_first_mpls_over_gre_exp || \ @@ -86,8 +93,22 @@ enum dr_ste_action_modify_type_l4 { DR_STE_ACTION_MDFY_TYPE_L4_UDP = 0x2, }; +enum { + HDR_MPLS_OFFSET_LABEL = 12, + HDR_MPLS_OFFSET_EXP = 9, + HDR_MPLS_OFFSET_S_BOS = 8, + HDR_MPLS_OFFSET_TTL = 0, +}; + u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask); +static inline u8 * +dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id) +{ + /* Calculate tag byte offset based on flex parser id */ + return tag + 4 * (3 - (parser_id % 4)); +} + #define DR_STE_CTX_BUILDER(fname) \ ((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \ struct mlx5dr_match_param *mask)) @@ -106,14 +127,22 @@ struct mlx5dr_ste_ctx { void DR_STE_CTX_BUILDER(mpls); void DR_STE_CTX_BUILDER(tnl_gre); void DR_STE_CTX_BUILDER(tnl_mpls); - int DR_STE_CTX_BUILDER(icmp); + void DR_STE_CTX_BUILDER(tnl_mpls_over_gre); + void DR_STE_CTX_BUILDER(tnl_mpls_over_udp); + void DR_STE_CTX_BUILDER(icmp); void DR_STE_CTX_BUILDER(general_purpose); void DR_STE_CTX_BUILDER(eth_l4_misc); void DR_STE_CTX_BUILDER(tnl_vxlan_gpe); void DR_STE_CTX_BUILDER(tnl_geneve); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt); void DR_STE_CTX_BUILDER(register_0); void DR_STE_CTX_BUILDER(register_1); void DR_STE_CTX_BUILDER(src_gvmi_qpn); + void DR_STE_CTX_BUILDER(flex_parser_0); + void DR_STE_CTX_BUILDER(flex_parser_1); + void DR_STE_CTX_BUILDER(tnl_gtpu); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1); /* Getters and Setters */ void (*ste_init)(u8 *hw_ste_p, u16 lu_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index c5f62d2a058f..0757a4e8540e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -1248,32 +1248,29 @@ dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value, u8 *tag) { struct mlx5dr_match_misc2 *misc_2 = &value->misc2; + u32 mpls_hdr; if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) { - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, - misc_2, outer_first_mpls_over_gre_label); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, - misc_2, outer_first_mpls_over_gre_exp); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, - misc_2, outer_first_mpls_over_gre_s_bos); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, - misc_2, outer_first_mpls_over_gre_ttl); + mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_gre_ttl = 0; } else { - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, - misc_2, outer_first_mpls_over_udp_label); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, - misc_2, outer_first_mpls_over_udp_exp); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, - misc_2, outer_first_mpls_over_udp_s_bos); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, - misc_2, outer_first_mpls_over_udp_ttl); + mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_udp_ttl = 0; } + + MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr); return 0; } @@ -1288,6 +1285,91 @@ dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag; } +static int +dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag; +} + #define ICMP_TYPE_OFFSET_FIRST_DW 24 #define ICMP_CODE_OFFSET_FIRST_DW 16 @@ -1300,9 +1382,11 @@ dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value, u32 *icmp_header_data; int dw0_location; int dw1_location; + u8 *parser_ptr; u8 *icmp_type; u8 *icmp_code; bool is_ipv4; + u32 icmp_hdr; is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3); if (is_ipv4) { @@ -1319,47 +1403,40 @@ dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value, dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; } - switch (dw0_location) { - case 4: - MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, - (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) | - (*icmp_code << ICMP_TYPE_OFFSET_FIRST_DW)); - - *icmp_type = 0; - *icmp_code = 0; - break; - default: - return -EINVAL; - } + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location); + icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) | + (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW); + *(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr); + *icmp_code = 0; + *icmp_type = 0; - switch (dw1_location) { - case 5: - MLX5_SET(ste_flex_parser_1, tag, flex_parser_5, - *icmp_header_data); - *icmp_header_data = 0; - break; - default: - return -EINVAL; - } + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location); + *(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data); + *icmp_header_data = 0; return 0; } -static int +static void dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { - int ret; + u8 parser_id; + bool is_ipv4; - ret = dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask); - if (ret) - return ret; + dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask); - sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3); + parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 : + sb->caps->flex_parser_id_icmpv6_dw0; + sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag; - - return 0; } static int @@ -1595,6 +1672,185 @@ dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag; } +static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_flags, misc3, + gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_type, misc3, + gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_teid, misc3, + gtpu_teid); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag; +} + struct mlx5dr_ste_ctx ste_ctx_v0 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, @@ -1609,14 +1865,22 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = { .build_mpls_init = &dr_ste_v0_build_mpls_init, .build_tnl_gre_init = &dr_ste_v0_build_tnl_gre_init, .build_tnl_mpls_init = &dr_ste_v0_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v0_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v0_build_tnl_mpls_over_gre_init, .build_icmp_init = &dr_ste_v0_build_icmp_init, .build_general_purpose_init = &dr_ste_v0_build_general_purpose_init, .build_eth_l4_misc_init = &dr_ste_v0_build_eth_l4_misc_init, .build_tnl_vxlan_gpe_init = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init, .build_tnl_geneve_init = &dr_ste_v0_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init, .build_register_0_init = &dr_ste_v0_build_register_0_init, .build_register_1_init = &dr_ste_v0_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v0_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init, /* Getters and Setters */ .ste_init = &dr_ste_v0_init, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 616ebc38381a..054c2e2b6554 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -1306,6 +1306,88 @@ static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag; } +static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag; +} + +static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag; +} + static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, u8 *tag) @@ -1337,16 +1419,14 @@ static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, return 0; } -static int dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +static void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag; - - return 0; } static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, @@ -1571,6 +1651,179 @@ static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag; } +static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +static void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid); + + return 0; +} + +static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; +} + struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, @@ -1585,14 +1838,23 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .build_mpls_init = &dr_ste_v1_build_mpls_init, .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, .build_icmp_init = &dr_ste_v1_build_icmp_init, .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, .build_register_0_init = &dr_ste_v1_build_register_0_init, .build_register_1_init = &dr_ste_v1_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + /* Getters and Setters */ .ste_init = &dr_ste_v1_init, .set_next_lu_type = &dr_ste_v1_set_next_lu_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index b599b6beb5b9..30ae3cda6d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -29,7 +29,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, last_htbl = tbl->rx.s_anchor; tbl->rx.default_icm_addr = action ? - action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : tbl->rx.nic_dmn->default_icm_addr; info.type = CONNECT_MISS; @@ -53,7 +53,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, last_htbl = tbl->tx.s_anchor; tbl->tx.default_icm_addr = action ? - action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr : + action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr : tbl->tx.nic_dmn->default_icm_addr; info.type = CONNECT_MISS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 4af0e4e6a13c..67460c42a99b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -12,17 +12,30 @@ #include "mlx5_ifc_dr.h" #include "mlx5dr.h" -#define DR_RULE_MAX_STES 17 +#define DR_RULE_MAX_STES 18 #define DR_ACTION_MAX_STES 5 #define WIRE_PORT 0xFFFF #define DR_STE_SVLAN 0x1 #define DR_STE_CVLAN 0x2 #define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) +#define DR_NUM_OF_FLEX_PARSERS 8 +#define DR_STE_MAX_FLEX_0_ID 3 +#define DR_STE_MAX_FLEX_1_ID 7 #define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) #define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) #define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) +static inline bool dr_is_flex_parser_0_id(u8 parser_id) +{ + return parser_id <= DR_STE_MAX_FLEX_0_ID; +} + +static inline bool dr_is_flex_parser_1_id(u8 parser_id) +{ + return parser_id > DR_STE_MAX_FLEX_0_ID; +} + enum mlx5dr_icm_chunk_size { DR_CHUNK_SIZE_1, DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ @@ -87,7 +100,8 @@ enum mlx5dr_matcher_criteria { DR_MATCHER_CRITERIA_INNER = 1 << 2, DR_MATCHER_CRITERIA_MISC2 = 1 << 3, DR_MATCHER_CRITERIA_MISC3 = 1 << 4, - DR_MATCHER_CRITERIA_MAX = 1 << 5, + DR_MATCHER_CRITERIA_MISC4 = 1 << 5, + DR_MATCHER_CRITERIA_MAX = 1 << 6, }; enum mlx5dr_action_type { @@ -389,11 +403,21 @@ void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -int mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -402,6 +426,25 @@ void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -419,6 +462,14 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_match_param *mask, struct mlx5dr_domain *dmn, bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); /* Actions utils */ @@ -646,7 +697,24 @@ struct mlx5dr_match_misc3 { u8 icmpv6_type; u8 icmpv4_code; u8 icmpv4_type; - u8 reserved_auto3[0x1c]; + u32 geneve_tlv_option_0_data; + u8 gtpu_msg_flags; + u8 gtpu_msg_type; + u32 gtpu_teid; + u32 gtpu_dw_2; + u32 gtpu_first_ext_dw_0; + u32 gtpu_dw_0; +}; + +struct mlx5dr_match_misc4 { + u32 prog_sample_field_value_0; + u32 prog_sample_field_id_0; + u32 prog_sample_field_value_1; + u32 prog_sample_field_id_1; + u32 prog_sample_field_value_2; + u32 prog_sample_field_id_2; + u32 prog_sample_field_value_3; + u32 prog_sample_field_id_3; }; struct mlx5dr_match_param { @@ -655,6 +723,7 @@ struct mlx5dr_match_param { struct mlx5dr_match_spec inner; struct mlx5dr_match_misc2 misc2; struct mlx5dr_match_misc3 misc3; + struct mlx5dr_match_misc4 misc4; }; #define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ @@ -678,6 +747,12 @@ struct mlx5dr_cmd_vport_cap { u32 num; }; +struct mlx5dr_roce_cap { + u8 roce_en:1; + u8 fl_rc_qp_when_roce_disabled:1; + u8 fl_rc_qp_when_roce_enabled:1; +}; + struct mlx5dr_cmd_caps { u16 gvmi; u64 nic_rx_drop_address; @@ -692,6 +767,13 @@ struct mlx5dr_cmd_caps { u8 flex_parser_id_icmp_dw1; u8 flex_parser_id_icmpv6_dw0; u8 flex_parser_id_icmpv6_dw1; + u8 flex_parser_id_geneve_tlv_option_0; + u8 flex_parser_id_mpls_over_gre; + u8 flex_parser_id_mpls_over_udp; + u8 flex_parser_id_gtpu_dw_0; + u8 flex_parser_id_gtpu_teid; + u8 flex_parser_id_gtpu_dw_2; + u8 flex_parser_id_gtpu_first_ext_dw_0; u8 max_ft_level; u16 roce_min_src_udp; u8 num_esw_ports; @@ -707,6 +789,8 @@ struct mlx5dr_cmd_caps { struct mlx5dr_esw_caps esw_caps; struct mlx5dr_cmd_vport_cap *vports_caps; bool prio_tag_required; + struct mlx5dr_roce_cap roce_caps; + u8 isolate_vl_tc:1; }; struct mlx5dr_domain_rx_tx { @@ -806,53 +890,71 @@ struct mlx5dr_ste_action_modify_field { u8 l4_type; }; +struct mlx5dr_action_rewrite { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u16 num_of_actions; + u32 index; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; +}; + +struct mlx5dr_action_reformat { + struct mlx5dr_domain *dmn; + u32 reformat_id; + u32 reformat_size; +}; + +struct mlx5dr_action_dest_tbl { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5dr_domain *dmn; + u32 id; + u32 group_id; + enum fs_flow_table_type type; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5dr_action **ref_actions; + u32 num_of_ref_actions; + } fw_tbl; + }; +}; + +struct mlx5dr_action_ctr { + u32 ctr_id; + u32 offeset; +}; + +struct mlx5dr_action_vport { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; +}; + +struct mlx5dr_action_push_vlan { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ +}; + +struct mlx5dr_action_flow_tag { + u32 flow_tag; +}; + struct mlx5dr_action { enum mlx5dr_action_type action_type; refcount_t refcount; + union { - struct { - struct mlx5dr_domain *dmn; - struct mlx5dr_icm_chunk *chunk; - u8 *data; - u16 num_of_actions; - u32 index; - u8 allow_rx:1; - u8 allow_tx:1; - u8 modify_ttl:1; - } rewrite; - struct { - struct mlx5dr_domain *dmn; - u32 reformat_id; - u32 reformat_size; - } reformat; - struct { - u8 is_fw_tbl:1; - union { - struct mlx5dr_table *tbl; - struct { - struct mlx5dr_domain *dmn; - u32 id; - u32 group_id; - enum fs_flow_table_type type; - u64 rx_icm_addr; - u64 tx_icm_addr; - struct mlx5dr_action **ref_actions; - u32 num_of_ref_actions; - } fw_tbl; - }; - } dest_tbl; - struct { - u32 ctr_id; - u32 offeset; - } ctr; - struct { - struct mlx5dr_domain *dmn; - struct mlx5dr_cmd_vport_cap *caps; - } vport; - struct { - u32 vlan_hdr; /* tpid_pcp_dei_vid */ - } push_vlan; - u32 flow_tag; + void *data; + struct mlx5dr_action_rewrite *rewrite; + struct mlx5dr_action_reformat *reformat; + struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_action_ctr *ctr; + struct mlx5dr_action_vport *vport; + struct mlx5dr_action_push_vlan *push_vlan; + struct mlx5dr_action_flow_tag *flow_tag; }; }; @@ -1063,6 +1165,7 @@ struct mlx5dr_cmd_qp_create_attr { u32 sq_wqe_cnt; u32 rq_wqe_cnt; u32 rq_wqe_shift; + u8 isolate_vl_tc:1; }; int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h index 83df6df6b459..9643ee647f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -434,10 +434,7 @@ struct mlx5_ifc_ste_gre_bits { }; struct mlx5_ifc_ste_flex_parser_0_bits { - u8 parser_3_label[0x14]; - u8 parser_3_exp[0x3]; - u8 parser_3_s_bos[0x1]; - u8 parser_3_ttl[0x8]; + u8 flex_parser_3[0x20]; u8 flex_parser_2[0x20]; @@ -488,6 +485,17 @@ struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits { + u8 reserved_at_0[0x5]; + u8 gtpu_msg_flags[0x3]; + u8 gtpu_msg_type[0x8]; + u8 reserved_at_10[0x10]; + + u8 gtpu_teid[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_ste_general_purpose_bits { u8 general_purpose_lookup_field[0x20]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 01f075fac276..3091dd014650 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -34,11 +34,6 @@ #include "wq.h" #include "mlx5_core.h" -static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride) -{ - return ((u32)1 << log_sz) << log_stride; -} - int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 97d074d7b78d..f99db88ee884 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -22,6 +22,7 @@ #include <net/red.h> #include <net/vxlan.h> #include <net/flow_offload.h> +#include <net/inet_ecn.h> #include "port.h" #include "core.h" @@ -367,6 +368,20 @@ struct mlxsw_sp_port_type_speed_ops { u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap); }; +static inline u8 mlxsw_sp_tunnel_ecn_decap(u8 outer_ecn, u8 inner_ecn, + bool *trap_en) +{ + bool set_ce = false; + + *trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + if (set_ce) + return INET_ECN_CE; + else if (outer_ecn == INET_ECN_ECT_1 && inner_ecn == INET_ECN_ECT_0) + return INET_ECN_ECT_1; + else + return inner_ecn; +} + static inline struct net_device * mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 0bd64169bf81..c8061beed6db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1059,6 +1059,131 @@ mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info); } +static void +mlxsw_sp_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + phy_stats->SymbolErrorDuringCarrier = + mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get(ppcnt_pl); +} + +static void +mlxsw_sp_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + mac_stats->FramesTransmittedOK = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + mac_stats->FramesReceivedOK = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + mac_stats->FrameCheckSequenceErrors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + mac_stats->AlignmentErrors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + mac_stats->OctetsTransmittedOK = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + mac_stats->OctetsReceivedOK = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + mac_stats->MulticastFramesXmittedOK = + mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get(ppcnt_pl); + mac_stats->BroadcastFramesXmittedOK = + mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get(ppcnt_pl); + mac_stats->MulticastFramesReceivedOK = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + mac_stats->BroadcastFramesReceivedOK = + mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get(ppcnt_pl); + mac_stats->InRangeLengthErrors = + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl); + mac_stats->OutOfRangeLengthField = + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl); + mac_stats->FrameTooLongErrors = + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl); +} + +static void +mlxsw_sp_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + ctrl_stats->MACControlFramesTransmitted = + mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get(ppcnt_pl); + ctrl_stats->MACControlFramesReceived = + mlxsw_reg_ppcnt_a_mac_control_frames_received_get(ppcnt_pl); + ctrl_stats->UnsupportedOpcodesReceived = + mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get(ppcnt_pl); +} + +static const struct ethtool_rmon_hist_range mlxsw_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +static void +mlxsw_sp_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, + 0, ppcnt_pl)) + return; + + rmon->undersize_pkts = + mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get(ppcnt_pl); + rmon->oversize_pkts = + mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get(ppcnt_pl); + rmon->fragments = + mlxsw_reg_ppcnt_ether_stats_fragments_get(ppcnt_pl); + + rmon->hist[0] = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get(ppcnt_pl); + rmon->hist[1] = + mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get(ppcnt_pl); + rmon->hist[2] = + mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get(ppcnt_pl); + rmon->hist[3] = + mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get(ppcnt_pl); + rmon->hist[4] = + mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get(ppcnt_pl); + rmon->hist[5] = + mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get(ppcnt_pl); + rmon->hist[6] = + mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get(ppcnt_pl); + rmon->hist[7] = + mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get(ppcnt_pl); + rmon->hist[8] = + mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get(ppcnt_pl); + rmon->hist[9] = + mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get(ppcnt_pl); + + *ranges = mlxsw_rmon_ranges; +} + const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .cap_link_lanes_supported = true, .get_drvinfo = mlxsw_sp_port_get_drvinfo, @@ -1075,6 +1200,10 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_module_info = mlxsw_sp_get_module_info, .get_module_eeprom = mlxsw_sp_get_module_eeprom, .get_ts_info = mlxsw_sp_get_ts_info, + .get_eth_phy_stats = mlxsw_sp_get_eth_phy_stats, + .get_eth_mac_stats = mlxsw_sp_get_eth_mac_stats, + .get_eth_ctrl_stats = mlxsw_sp_get_eth_ctrl_stats, + .get_rmon_stats = mlxsw_sp_get_rmon_stats, }; struct mlxsw_sp1_port_link_mode { @@ -1230,16 +1359,22 @@ mlxsw_sp1_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd) { + struct mlxsw_sp1_port_link_mode link; int i; - cmd->link_mode = -1; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->lanes = 0; if (!carrier_ok) return; for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) - cmd->link_mode = mlxsw_sp1_port_link_mode[i].mask_ethtool; + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) { + link = mlxsw_sp1_port_link_mode[i]; + ethtool_params_from_link_mode(cmd, + link.mask_ethtool); + } } } @@ -1672,7 +1807,9 @@ mlxsw_sp2_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, struct mlxsw_sp2_port_link_mode link; int i; - cmd->link_mode = -1; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->lanes = 0; if (!carrier_ok) return; @@ -1680,7 +1817,8 @@ mlxsw_sp2_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { link = mlxsw_sp2_port_link_mode[i]; - cmd->link_mode = link.mask_ethtool[1]; + ethtool_params_from_link_mode(cmd, + link.mask_ethtool[1]); } } } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index b8b08a6a1d10..5facabd86882 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -337,12 +337,11 @@ static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp, u8 inner_ecn, u8 outer_ecn) { char tidem_pl[MLXSW_REG_TIDEM_LEN]; - bool trap_en, set_ce = false; u8 new_inner_ecn; + bool trap_en; - trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); - new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; - + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index e5ec595593f4..9eba8fa684ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -909,12 +909,11 @@ static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp, u8 inner_ecn, u8 outer_ecn) { char tndem_pl[MLXSW_REG_TNDEM_LEN]; - bool trap_en, set_ce = false; u8 new_inner_ecn; + bool trap_en; - trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); - new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; - + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn, trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index baf17c0b2702..04672eb5c7f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -29,7 +29,6 @@ struct mlxsw_sp_qdisc; struct mlxsw_sp_qdisc_ops { enum mlxsw_sp_qdisc_type type; int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); @@ -48,11 +47,14 @@ struct mlxsw_sp_qdisc_ops { */ void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); + struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent); + unsigned int num_classes; }; struct mlxsw_sp_qdisc { u32 handle; - u8 tclass_num; + int tclass_num; u8 prio_bitmap; union { struct red_stats red; @@ -66,11 +68,13 @@ struct mlxsw_sp_qdisc { } stats_base; struct mlxsw_sp_qdisc_ops *ops; + struct mlxsw_sp_qdisc *parent; + struct mlxsw_sp_qdisc *qdiscs; + unsigned int num_classes; }; struct mlxsw_sp_qdisc_state { struct mlxsw_sp_qdisc root_qdisc; - struct mlxsw_sp_qdisc tclass_qdiscs[IEEE_8021QAZ_MAX_TCS]; /* When a PRIO or ETS are added, the invisible FIFOs in their bands are * created first. When notifications for these FIFOs arrive, it is not @@ -85,15 +89,55 @@ struct mlxsw_sp_qdisc_state { */ u32 future_handle; bool future_fifos[IEEE_8021QAZ_MAX_TCS]; + struct mutex lock; /* Protects qdisc state. */ }; static bool -mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, - enum mlxsw_sp_qdisc_type type) +mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle) +{ + return mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->handle == handle; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk(struct mlxsw_sp_qdisc *qdisc, + struct mlxsw_sp_qdisc *(*pre)(struct mlxsw_sp_qdisc *, + void *), + void *data) +{ + struct mlxsw_sp_qdisc *tmp; + unsigned int i; + + if (pre) { + tmp = pre(qdisc, data); + if (tmp) + return tmp; + } + + if (qdisc->ops) { + for (i = 0; i < qdisc->num_classes; i++) { + tmp = &qdisc->qdiscs[i]; + if (qdisc->ops) { + tmp = mlxsw_sp_qdisc_walk(tmp, pre, data); + if (tmp) + return tmp; + } + } + } + + return NULL; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_find(struct mlxsw_sp_qdisc *qdisc, void *data) { - return mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && - mlxsw_sp_qdisc->ops->type == type && - mlxsw_sp_qdisc->handle == handle; + u32 parent = *(u32 *)data; + + if (qdisc->ops && TC_H_MAJ(qdisc->handle) == TC_H_MAJ(parent)) { + if (qdisc->ops->find_class) + return qdisc->ops->find_class(qdisc, parent); + } + + return NULL; } static struct mlxsw_sp_qdisc * @@ -101,39 +145,46 @@ mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, bool root_only) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - int tclass, child_index; + if (!qdisc_state) + return NULL; if (parent == TC_H_ROOT) return &qdisc_state->root_qdisc; - - if (root_only || !qdisc_state || - !qdisc_state->root_qdisc.ops || - TC_H_MAJ(parent) != qdisc_state->root_qdisc.handle || - TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) + if (root_only) return NULL; + return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, + mlxsw_sp_qdisc_walk_cb_find, &parent); +} - child_index = TC_H_MIN(parent); - tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); - return &qdisc_state->tclass_qdiscs[tclass]; +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_find_by_handle(struct mlxsw_sp_qdisc *qdisc, void *data) +{ + u32 handle = *(u32 *)data; + + if (qdisc->ops && qdisc->handle == handle) + return qdisc; + return NULL; } static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - int i; - - if (qdisc_state->root_qdisc.handle == handle) - return &qdisc_state->root_qdisc; - if (qdisc_state->root_qdisc.handle == TC_H_UNSPEC) + if (!qdisc_state) return NULL; + return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, + mlxsw_sp_qdisc_walk_cb_find_by_handle, + &handle); +} - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (qdisc_state->tclass_qdiscs[i].handle == handle) - return &qdisc_state->tclass_qdiscs[i]; +static void +mlxsw_sp_qdisc_reduce_parent_backlog(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *tmp; - return NULL; + for (tmp = mlxsw_sp_qdisc->parent; tmp; tmp = tmp->parent) + tmp->stats_base.backlog -= mlxsw_sp_qdisc->stats_base.backlog; } static int @@ -157,32 +208,48 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, err_hdroom = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); } - if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy) + if (!mlxsw_sp_qdisc->ops) + return 0; + + mlxsw_sp_qdisc_reduce_parent_backlog(mlxsw_sp_qdisc); + if (mlxsw_sp_qdisc->ops->destroy) err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + if (mlxsw_sp_qdisc->ops->clean_stats) + mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); mlxsw_sp_qdisc->handle = TC_H_UNSPEC; mlxsw_sp_qdisc->ops = NULL; - + mlxsw_sp_qdisc->num_classes = 0; + kfree(mlxsw_sp_qdisc->qdiscs); + mlxsw_sp_qdisc->qdiscs = NULL; return err_hdroom ?: err; } -static int -mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct mlxsw_sp_qdisc_ops *ops, void *params) +static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) { struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; struct mlxsw_sp_hdroom orig_hdroom; + unsigned int i; int err; - if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) - /* In case this location contained a different qdisc of the - * same type we can override the old qdisc configuration. - * Otherwise, we need to remove the old qdisc before setting the - * new one. - */ - mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + err = ops->check_params(mlxsw_sp_port, params); + if (err) + return err; + + if (ops->num_classes) { + mlxsw_sp_qdisc->qdiscs = kcalloc(ops->num_classes, + sizeof(*mlxsw_sp_qdisc->qdiscs), + GFP_KERNEL); + if (!mlxsw_sp_qdisc->qdiscs) + return -ENOMEM; + + for (i = 0; i < ops->num_classes; i++) + mlxsw_sp_qdisc->qdiscs[i].parent = mlxsw_sp_qdisc; + } orig_hdroom = *mlxsw_sp_port->hdroom; if (root_qdisc == mlxsw_sp_qdisc) { @@ -198,20 +265,46 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, goto err_hdroom_configure; } - err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params); + mlxsw_sp_qdisc->num_classes = ops->num_classes; + mlxsw_sp_qdisc->ops = ops; + mlxsw_sp_qdisc->handle = handle; + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); + if (err) + goto err_replace; + + return 0; + +err_replace: + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->ops = NULL; + mlxsw_sp_qdisc->num_classes = 0; + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); +err_hdroom_configure: + kfree(mlxsw_sp_qdisc->qdiscs); + mlxsw_sp_qdisc->qdiscs = NULL; + return err; +} + +static int +mlxsw_sp_qdisc_change(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) +{ + struct mlxsw_sp_qdisc_ops *ops = mlxsw_sp_qdisc->ops; + int err; + + err = ops->check_params(mlxsw_sp_port, params); if (err) - goto err_bad_param; + goto unoffload; err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); if (err) - goto err_config; + goto unoffload; /* Check if the Qdisc changed. That includes a situation where an * invisible Qdisc replaces another one, or is being added for the * first time. */ - if (mlxsw_sp_qdisc->handle != handle || handle == TC_H_UNSPEC) { - mlxsw_sp_qdisc->ops = ops; + if (mlxsw_sp_qdisc->handle != handle) { if (ops->clean_stats) ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); } @@ -219,11 +312,8 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, mlxsw_sp_qdisc->handle = handle; return 0; -err_bad_param: -err_config: - mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); -err_hdroom_configure: - if (mlxsw_sp_qdisc->handle == handle && ops->unoffload) +unoffload: + if (ops->unoffload) ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); @@ -231,6 +321,27 @@ err_hdroom_configure: } static int +mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) +{ + if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) + /* In case this location contained a different qdisc of the + * same type we can override the old qdisc configuration. + * Otherwise, we need to remove the old qdisc before setting the + * new one. + */ + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + + if (!mlxsw_sp_qdisc->ops) + return mlxsw_sp_qdisc_create(mlxsw_sp_port, handle, + mlxsw_sp_qdisc, ops, params); + else + return mlxsw_sp_qdisc_change(mlxsw_sp_port, handle, + mlxsw_sp_qdisc, params); +} + +static int mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) @@ -295,7 +406,7 @@ mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 *p_tx_bytes, u64 *p_tx_packets, u64 *p_drops, u64 *p_backlog) { - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; u64 tx_bytes, tx_packets; @@ -395,7 +506,7 @@ static void mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; struct red_stats *red_base; @@ -421,20 +532,12 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; - - if (root_qdisc != mlxsw_sp_qdisc) - root_qdisc->stats_base.backlog -= - mlxsw_sp_qdisc->stats_base.backlog; - return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, mlxsw_sp_qdisc->tclass_num); } static int mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -467,7 +570,7 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct tc_red_qopt_offload_params *p = params; - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; u32 min, max; u64 prob; @@ -512,7 +615,7 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, void *xstats_ptr) { struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red; - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; struct red_stats *res = xstats_ptr; int early_drops, pdrops; @@ -536,7 +639,7 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; u64 overlimits; @@ -553,6 +656,13 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent) +{ + return NULL; +} + #define MLXSW_SP_PORT_DEFAULT_TCLASS 0 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { @@ -564,10 +674,11 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { .get_stats = mlxsw_sp_qdisc_get_red_stats, .get_xstats = mlxsw_sp_qdisc_get_red_xstats, .clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats, + .find_class = mlxsw_sp_qdisc_leaf_find_class, }; -int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_red_qopt_offload *p) +static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -581,8 +692,7 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_red, &p->set); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_RED)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -599,6 +709,18 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_red(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + static void mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -622,13 +744,6 @@ static int mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; - - if (root_qdisc != mlxsw_sp_qdisc) - root_qdisc->stats_base.backlog -= - mlxsw_sp_qdisc->stats_base.backlog; - return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, mlxsw_sp_qdisc->tclass_num, 0, @@ -678,7 +793,6 @@ mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p) static int mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_tbf_qopt_offload_replace_params *p = params; @@ -766,10 +880,11 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = { .destroy = mlxsw_sp_qdisc_tbf_destroy, .get_stats = mlxsw_sp_qdisc_get_tbf_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, + .find_class = mlxsw_sp_qdisc_leaf_find_class, }; -int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_tbf_qopt_offload *p) +static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -783,8 +898,7 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_tbf, &p->replace_params); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_TBF)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -798,22 +912,20 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, } } -static int -mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; + int err; - if (root_qdisc != mlxsw_sp_qdisc) - root_qdisc->stats_base.backlog -= - mlxsw_sp_qdisc->stats_base.backlog; - return 0; + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; } static int mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { return 0; @@ -841,25 +953,18 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = { .type = MLXSW_SP_QDISC_FIFO, .check_params = mlxsw_sp_qdisc_fifo_check_params, .replace = mlxsw_sp_qdisc_fifo_replace, - .destroy = mlxsw_sp_qdisc_fifo_destroy, .get_stats = mlxsw_sp_qdisc_get_fifo_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, }; -int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_fifo_qopt_offload *p) +static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - int tclass, child_index; + unsigned int band; u32 parent_handle; - /* Invisible FIFOs are tracked in future_handle and future_fifos. Make - * sure that not more than one qdisc is created for a port at a time. - * RTNL is a simple proxy for that. - */ - ASSERT_RTNL(); - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) { parent_handle = TC_H_MAJ(p->parent); @@ -872,13 +977,12 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, qdisc_state->future_handle = parent_handle; } - child_index = TC_H_MIN(p->parent); - tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); - if (tclass < IEEE_8021QAZ_MAX_TCS) { + band = TC_H_MIN(p->parent) - 1; + if (band < IEEE_8021QAZ_MAX_TCS) { if (p->command == TC_FIFO_REPLACE) - qdisc_state->future_fifos[tclass] = true; + qdisc_state->future_fifos[band] = true; else if (p->command == TC_FIFO_DESTROY) - qdisc_state->future_fifos[tclass] = false; + qdisc_state->future_fifos[band] = false; } } if (!mlxsw_sp_qdisc) @@ -890,16 +994,12 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_fifo, NULL); } - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_FIFO)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { case TC_FIFO_DESTROY: - if (p->handle == mlxsw_sp_qdisc->handle) - return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - mlxsw_sp_qdisc); - return 0; + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); case TC_FIFO_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); @@ -910,21 +1010,32 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } -static int -__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &qdisc_state->tclass_qdiscs[i]); - qdisc_state->tclass_qdiscs[i].prio_bitmap = 0; + &mlxsw_sp_qdisc->qdiscs[i]); + mlxsw_sp_qdisc->qdiscs[i].prio_bitmap = 0; } return 0; @@ -934,7 +1045,7 @@ static int mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); } static int @@ -948,7 +1059,6 @@ __mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) static int mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; @@ -957,8 +1067,9 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, - unsigned int nbands, +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 handle, unsigned int nbands, const unsigned int *quanta, const unsigned int *weights, const u8 *priomap) @@ -971,7 +1082,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; @@ -993,6 +1104,9 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, return err; } } + + child_qdisc->tclass_num = tclass; + if (old_priomap != child_qdisc->prio_bitmap && child_qdisc->ops && child_qdisc->ops->clean_stats) { backlog = child_qdisc->stats_base.backlog; @@ -1002,7 +1116,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, } if (handle == qdisc_state->future_handle && - qdisc_state->future_fifos[tclass]) { + qdisc_state->future_fifos[band]) { err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, child_qdisc, &mlxsw_sp_qdisc_ops_fifo, @@ -1013,7 +1127,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, } for (; band < IEEE_8021QAZ_MAX_TCS; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); mlxsw_sp_port_ets_set(mlxsw_sp_port, @@ -1034,8 +1148,9 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct tc_prio_qopt_offload_params *p = params; unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, - zeroes, zeroes, p->priomap); + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc, + handle, p->bands, zeroes, + zeroes, p->priomap); } static void @@ -1066,7 +1181,6 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *tc_qdisc; u64 tx_packets = 0; u64 tx_bytes = 0; @@ -1074,8 +1188,8 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 drops = 0; int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tc_qdisc = &qdisc_state->tclass_qdiscs[i]; + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { + tc_qdisc = &mlxsw_sp_qdisc->qdiscs[i]; mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, &tx_bytes, &tx_packets, &drops, &backlog); @@ -1112,6 +1226,18 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->stats_base.backlog = 0; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_prio_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent) +{ + int child_index = TC_H_MIN(parent); + int band = child_index - 1; + + if (band < 0 || band >= mlxsw_sp_qdisc->num_classes) + return NULL; + return &mlxsw_sp_qdisc->qdiscs[band]; +} + static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .type = MLXSW_SP_QDISC_PRIO, .check_params = mlxsw_sp_qdisc_prio_check_params, @@ -1120,11 +1246,12 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .destroy = mlxsw_sp_qdisc_prio_destroy, .get_stats = mlxsw_sp_qdisc_get_prio_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, + .find_class = mlxsw_sp_qdisc_prio_find_class, + .num_classes = IEEE_8021QAZ_MAX_TCS, }; static int mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_ets_qopt_offload_replace_params *p = params; @@ -1139,8 +1266,9 @@ mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, { struct tc_ets_qopt_offload_replace_params *p = params; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, - p->quanta, p->weights, p->priomap); + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc, + handle, p->bands, p->quanta, + p->weights, p->priomap); } static void @@ -1158,7 +1286,7 @@ static int mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); } static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { @@ -1169,6 +1297,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { .destroy = mlxsw_sp_qdisc_ets_destroy, .get_stats = mlxsw_sp_qdisc_get_prio_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, + .find_class = mlxsw_sp_qdisc_prio_find_class, + .num_classes = IEEE_8021QAZ_MAX_TCS, }; /* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting @@ -1201,12 +1331,10 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u8 band, u32 child_handle) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; - if (band < IEEE_8021QAZ_MAX_TCS && - qdisc_state->tclass_qdiscs[tclass_num].handle == child_handle) + if (band < mlxsw_sp_qdisc->num_classes && + mlxsw_sp_qdisc->qdiscs[band].handle == child_handle) return 0; if (!child_handle) { @@ -1224,8 +1352,10 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); - mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &qdisc_state->tclass_qdiscs[tclass_num]); + mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band); + if (!WARN_ON(!mlxsw_sp_qdisc)) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + return -EOPNOTSUPP; } @@ -1238,8 +1368,8 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, p->band, p->child_handle); } -int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_prio_qopt_offload *p) +static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -1253,8 +1383,7 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_prio, &p->replace_params); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_PRIO)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -1271,8 +1400,20 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, } } -int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_ets_qopt_offload *p) +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_prio(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -1286,8 +1427,7 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_ets, &p->replace_params); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_ETS)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -1305,6 +1445,18 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_ets(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + struct mlxsw_sp_qevent_block { struct list_head binding_list; struct list_head mall_entry_list; @@ -1834,22 +1986,20 @@ int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_por int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc_state *qdisc_state; - int i; qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL); if (!qdisc_state) return -ENOMEM; + mutex_init(&qdisc_state->lock); qdisc_state->root_qdisc.prio_bitmap = 0xff; qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - qdisc_state->tclass_qdiscs[i].tclass_num = i; - mlxsw_sp_port->qdisc = qdisc_state; return 0; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { + mutex_destroy(&mlxsw_sp_port->qdisc->lock); kfree(mlxsw_sp_port->qdisc); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c1f05c17557d..eeccd586e781 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2916,7 +2916,8 @@ mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * return; if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE && - !switchdev_work->fdb_info.added_by_user) + (!switchdev_work->fdb_info.added_by_user || + switchdev_work->fdb_info.is_local)) return; if (!netif_running(dev)) @@ -2971,7 +2972,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true); if (err) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 2feed6ce19d3..13eef6e9bd2d 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -193,11 +193,10 @@ static void ks8851_read_mac_addr(struct net_device *dev) static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np) { struct net_device *dev = ks->netdev; - const u8 *mac_addr; + int ret; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(dev->dev_addr, mac_addr); + ret = of_get_mac_address(np, dev->dev_addr); + if (!ret) { ks8851_write_mac_addr(dev); return; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e7ab5f3f73fd..dae10328c6cf 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -885,8 +885,8 @@ static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu) } mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_); - mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) & - MAC_RX_MAX_SIZE_MASK_); + mac_rx |= (((new_mtu + ETH_HLEN + ETH_FCS_LEN) + << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); lan743x_csr_write(adapter, MAC_RX, mac_rx); if (enabled) { @@ -1944,7 +1944,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) struct sk_buff *skb; dma_addr_t dma_ptr; - buffer_length = netdev->mtu + ETH_HLEN + 4 + RX_HEAD_PADDING; + buffer_length = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + RX_HEAD_PADDING; descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; @@ -2040,7 +2040,7 @@ lan743x_rx_trim_skb(struct sk_buff *skb, int frame_length) dev_kfree_skb_irq(skb); return NULL; } - frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4); + frame_length = max_t(int, 0, frame_length - ETH_FCS_LEN); if (skb->len > frame_length) { skb->tail -= skb->len - frame_length; skb->len = frame_length; @@ -2771,7 +2771,6 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, { struct lan743x_adapter *adapter = NULL; struct net_device *netdev = NULL; - const void *mac_addr; int ret = -ENODEV; netdev = devm_alloc_etherdev(&pdev->dev, @@ -2788,9 +2787,7 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED; netdev->max_mtu = LAN743X_MAX_FRAME_SIZE; - mac_addr = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(adapter->mac_address, mac_addr); + of_get_mac_address(pdev->dev.of_node, adapter->mac_address); ret = lan743x_pci_init(adapter, pdev); if (ret) diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig new file mode 100644 index 000000000000..e1ac0a5d808d --- /dev/null +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -0,0 +1,29 @@ +# +# Microsoft Azure network device configuration +# + +config NET_VENDOR_MICROSOFT + bool "Microsoft Network Devices" + default y + help + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip the + question about Microsoft network devices. If you say Y, you will be + asked for your specific device in the following question. + +if NET_VENDOR_MICROSOFT + +config MICROSOFT_MANA + tristate "Microsoft Azure Network Adapter (MANA) support" + depends on PCI_MSI && X86_64 + select PCI_HYPERV + help + This driver supports Microsoft Azure Network Adapter (MANA). + So far, the driver is only supported on X86_64. + + To compile this driver as a module, choose M here. + The module will be called mana. + +endif #NET_VENDOR_MICROSOFT diff --git a/drivers/net/ethernet/microsoft/Makefile b/drivers/net/ethernet/microsoft/Makefile new file mode 100644 index 000000000000..d2ddc218135f --- /dev/null +++ b/drivers/net/ethernet/microsoft/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Microsoft Azure network device driver. +# + +obj-$(CONFIG_MICROSOFT_MANA) += mana/ diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile new file mode 100644 index 000000000000..0edd5bb685f3 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Makefile for the Microsoft Azure Network Adapter driver + +obj-$(CONFIG_MICROSOFT_MANA) += mana.o +mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h new file mode 100644 index 000000000000..33e53d32e891 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/gdma.h @@ -0,0 +1,673 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _GDMA_H +#define _GDMA_H + +#include <linux/dma-mapping.h> +#include <linux/netdevice.h> + +#include "shm_channel.h" + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +enum gdma_request_type { + GDMA_VERIFY_VF_DRIVER_VERSION = 1, + GDMA_QUERY_MAX_RESOURCES = 2, + GDMA_LIST_DEVICES = 3, + GDMA_REGISTER_DEVICE = 4, + GDMA_DEREGISTER_DEVICE = 5, + GDMA_GENERATE_TEST_EQE = 10, + GDMA_CREATE_QUEUE = 12, + GDMA_DISABLE_QUEUE = 13, + GDMA_CREATE_DMA_REGION = 25, + GDMA_DMA_REGION_ADD_PAGES = 26, + GDMA_DESTROY_DMA_REGION = 27, +}; + +enum gdma_queue_type { + GDMA_INVALID_QUEUE, + GDMA_SQ, + GDMA_RQ, + GDMA_CQ, + GDMA_EQ, +}; + +enum gdma_work_request_flags { + GDMA_WR_NONE = 0, + GDMA_WR_OOB_IN_SGL = BIT(0), + GDMA_WR_PAD_BY_SGE0 = BIT(1), +}; + +enum gdma_eqe_type { + GDMA_EQE_COMPLETION = 3, + GDMA_EQE_TEST_EVENT = 64, + GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, + GDMA_EQE_HWC_INIT_DATA = 130, + GDMA_EQE_HWC_INIT_DONE = 131, +}; + +enum { + GDMA_DEVICE_NONE = 0, + GDMA_DEVICE_HWC = 1, + GDMA_DEVICE_MANA = 2, +}; + +struct gdma_resource { + /* Protect the bitmap */ + spinlock_t lock; + + /* The bitmap size in bits. */ + u32 size; + + /* The bitmap tracks the resources. */ + unsigned long *map; +}; + +union gdma_doorbell_entry { + u64 as_uint64; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 31; + u64 arm : 1; + } cq; + + struct { + u64 id : 24; + u64 wqe_cnt : 8; + u64 tail_ptr : 32; + } rq; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 32; + } sq; + + struct { + u64 id : 16; + u64 reserved : 16; + u64 tail_ptr : 31; + u64 arm : 1; + } eq; +}; /* HW DATA */ + +struct gdma_msg_hdr { + u32 hdr_type; + u32 msg_type; + u16 msg_version; + u16 hwc_msg_id; + u32 msg_size; +}; /* HW DATA */ + +struct gdma_dev_id { + union { + struct { + u16 type; + u16 instance; + }; + + u32 as_uint32; + }; +}; /* HW DATA */ + +struct gdma_req_hdr { + struct gdma_msg_hdr req; + struct gdma_msg_hdr resp; /* The expected response */ + struct gdma_dev_id dev_id; + u32 activity_id; +}; /* HW DATA */ + +struct gdma_resp_hdr { + struct gdma_msg_hdr response; + struct gdma_dev_id dev_id; + u32 activity_id; + u32 status; + u32 reserved; +}; /* HW DATA */ + +struct gdma_general_req { + struct gdma_req_hdr hdr; +}; /* HW DATA */ + +#define GDMA_MESSAGE_V1 1 + +struct gdma_general_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define GDMA_STANDARD_HEADER_TYPE 0 + +static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code, + u32 req_size, u32 resp_size) +{ + hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->req.msg_type = code; + hdr->req.msg_version = GDMA_MESSAGE_V1; + hdr->req.msg_size = req_size; + + hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->resp.msg_type = code; + hdr->resp.msg_version = GDMA_MESSAGE_V1; + hdr->resp.msg_size = resp_size; +} + +/* The 16-byte struct is part of the GDMA work queue entry (WQE). */ +struct gdma_sge { + u64 address; + u32 mem_key; + u32 size; +}; /* HW DATA */ + +struct gdma_wqe_request { + struct gdma_sge *sgl; + u32 num_sge; + + u32 inline_oob_size; + const void *inline_oob_data; + + u32 flags; + u32 client_data_unit; +}; + +enum gdma_page_type { + GDMA_PAGE_TYPE_4K, +}; + +#define GDMA_INVALID_DMA_REGION 0 + +struct gdma_mem_info { + struct device *dev; + + dma_addr_t dma_handle; + void *virt_addr; + u64 length; + + /* Allocated by the PF driver */ + u64 gdma_region; +}; + +#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 + +struct gdma_dev { + struct gdma_context *gdma_context; + + struct gdma_dev_id dev_id; + + u32 pdid; + u32 doorbell; + u32 gpa_mkey; + + /* GDMA driver specific pointer */ + void *driver_data; +}; + +#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE + +#define GDMA_CQE_SIZE 64 +#define GDMA_EQE_SIZE 16 +#define GDMA_MAX_SQE_SIZE 512 +#define GDMA_MAX_RQE_SIZE 256 + +#define GDMA_COMP_DATA_SIZE 0x3C + +#define GDMA_EVENT_DATA_SIZE 0xC + +/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */ +#define GDMA_WQE_BU_SIZE 32 + +#define INVALID_PDID UINT_MAX +#define INVALID_DOORBELL UINT_MAX +#define INVALID_MEM_KEY UINT_MAX +#define INVALID_QUEUE_ID UINT_MAX +#define INVALID_PCI_MSIX_INDEX UINT_MAX + +struct gdma_comp { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + u32 wq_num; + bool is_sq; +}; + +struct gdma_event { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u8 type; +}; + +struct gdma_queue; + +#define CQE_POLLING_BUFFER 512 +struct mana_eq { + struct gdma_queue *eq; + struct gdma_comp cqe_poll[CQE_POLLING_BUFFER]; +}; + +typedef void gdma_eq_callback(void *context, struct gdma_queue *q, + struct gdma_event *e); + +typedef void gdma_cq_callback(void *context, struct gdma_queue *q); + +/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE + * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the + * driver increases the 'head' in BUs rather than in bytes, and notifies + * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track + * the HW head, and increases the 'head' by 1 for every processed EQE/CQE. + * + * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is + * processed, the driver increases the 'tail' to indicate that WQEs have + * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ. + * + * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures + * that the EQ/CQ is big enough so they can't overflow, and the driver uses + * the owner bits mechanism to detect if the queue has become empty. + */ +struct gdma_queue { + struct gdma_dev *gdma_dev; + + enum gdma_queue_type type; + u32 id; + + struct gdma_mem_info mem_info; + + void *queue_mem_ptr; + u32 queue_size; + + bool monitor_avl_buf; + + u32 head; + u32 tail; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + bool disable_needed; + + gdma_eq_callback *callback; + void *context; + + unsigned int msix_index; + + u32 log2_throttle_limit; + + /* NAPI data */ + struct napi_struct napi; + int work_done; + int budget; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent; /* For CQ/EQ relationship */ + } cq; + }; +}; + +struct gdma_queue_spec { + enum gdma_queue_type type; + bool monitor_avl_buf; + unsigned int queue_size; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + gdma_eq_callback *callback; + void *context; + + unsigned long log2_throttle_limit; + + /* Only used by the MANA device. */ + struct net_device *ndev; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent_eq; + + } cq; + }; +}; + +struct gdma_irq_context { + void (*handler)(void *arg); + void *arg; +}; + +struct gdma_context { + struct device *dev; + + /* Per-vPort max number of queues */ + unsigned int max_num_queues; + unsigned int max_num_msix; + unsigned int num_msix_usable; + struct gdma_resource msix_resource; + struct gdma_irq_context *irq_contexts; + + /* This maps a CQ index to the queue structure. */ + unsigned int max_num_cqs; + struct gdma_queue **cq_table; + + /* Protect eq_test_event and test_event_eq_id */ + struct mutex eq_test_event_mutex; + struct completion eq_test_event; + u32 test_event_eq_id; + + void __iomem *bar0_va; + void __iomem *shm_base; + void __iomem *db_page_base; + u32 db_page_size; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; + + /* Hardware communication channel (HWC) */ + struct gdma_dev hwc; + + /* Azure network adapter */ + struct gdma_dev mana; +}; + +#define MAX_NUM_GDMA_DEVICES 4 + +static inline bool mana_gd_is_mana(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_MANA; +} + +static inline bool mana_gd_is_hwc(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_HWC; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset); +u32 mana_gd_wq_avail_space(struct gdma_queue *wq); + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq); + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); + +void mana_gd_arm_cq(struct gdma_queue *cq); + +struct gdma_wqe { + u32 reserved :24; + u32 last_vbytes :8; + + union { + u32 flags; + + struct { + u32 num_sge :8; + u32 inline_oob_size_div4:3; + u32 client_oob_in_sgl :1; + u32 reserved1 :4; + u32 client_data_unit :14; + u32 reserved2 :2; + }; + }; +}; /* HW DATA */ + +#define INLINE_OOB_SMALL_SIZE 8 +#define INLINE_OOB_LARGE_SIZE 24 + +#define MAX_TX_WQE_SIZE 512 +#define MAX_RX_WQE_SIZE 256 + +struct gdma_cqe { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + + union { + u32 as_uint32; + + struct { + u32 wq_num : 24; + u32 is_sq : 1; + u32 reserved : 4; + u32 owner_bits : 3; + }; + } cqe_info; +}; /* HW DATA */ + +#define GDMA_CQE_OWNER_BITS 3 + +#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1) + +#define SET_ARM_BIT 1 + +#define GDMA_EQE_OWNER_BITS 3 + +union gdma_eqe_info { + u32 as_uint32; + + struct { + u32 type : 8; + u32 reserved1 : 8; + u32 client_id : 2; + u32 reserved2 : 11; + u32 owner_bits : 3; + }; +}; /* HW DATA */ + +#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1) +#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries)) + +struct gdma_eqe { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u32 eqe_info; +}; /* HW DATA */ + +#define GDMA_REG_DB_PAGE_OFFSET 8 +#define GDMA_REG_DB_PAGE_SIZE 0x10 +#define GDMA_REG_SHM_OFFSET 0x18 + +struct gdma_posted_wqe_info { + u32 wqe_size_in_bu; +}; + +/* GDMA_GENERATE_TEST_EQE */ +struct gdma_generate_test_event_req { + struct gdma_req_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_VERIFY_VF_DRIVER_VERSION */ +enum { + GDMA_PROTOCOL_V1 = 1, + GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1, + GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, +}; + +struct gdma_verify_ver_req { + struct gdma_req_hdr hdr; + + /* Mandatory fields required for protocol establishment */ + u64 protocol_ver_min; + u64 protocol_ver_max; + u64 drv_cap_flags1; + u64 drv_cap_flags2; + u64 drv_cap_flags3; + u64 drv_cap_flags4; + + /* Advisory fields */ + u64 drv_ver; + u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */ + u32 reserved; + u32 os_ver_major; + u32 os_ver_minor; + u32 os_ver_build; + u32 os_ver_platform; + u64 reserved_2; + u8 os_ver_str1[128]; + u8 os_ver_str2[128]; + u8 os_ver_str3[128]; + u8 os_ver_str4[128]; +}; /* HW DATA */ + +struct gdma_verify_ver_resp { + struct gdma_resp_hdr hdr; + u64 gdma_protocol_ver; + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; +}; /* HW DATA */ + +/* GDMA_QUERY_MAX_RESOURCES */ +struct gdma_query_max_resources_resp { + struct gdma_resp_hdr hdr; + u32 status; + u32 max_sq; + u32 max_rq; + u32 max_cq; + u32 max_eq; + u32 max_db; + u32 max_mst; + u32 max_cq_mod_ctx; + u32 max_mod_cq; + u32 max_msix; +}; /* HW DATA */ + +/* GDMA_LIST_DEVICES */ +struct gdma_list_devices_resp { + struct gdma_resp_hdr hdr; + u32 num_of_devs; + u32 reserved; + struct gdma_dev_id devs[64]; +}; /* HW DATA */ + +/* GDMA_REGISTER_DEVICE */ +struct gdma_register_device_resp { + struct gdma_resp_hdr hdr; + u32 pdid; + u32 gpa_mkey; + u32 db_id; +}; /* HW DATA */ + +/* GDMA_CREATE_QUEUE */ +struct gdma_create_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 reserved1; + u32 pdid; + u32 doolbell_id; + u64 gdma_region; + u32 reserved2; + u32 queue_size; + u32 log2_throttle_limit; + u32 eq_pci_msix_index; + u32 cq_mod_ctx_id; + u32 cq_parent_eq_id; + u8 rq_drop_on_overrun; + u8 rq_err_on_wqe_overflow; + u8 rq_chain_rec_wqes; + u8 sq_hw_db; + u32 reserved3; +}; /* HW DATA */ + +struct gdma_create_queue_resp { + struct gdma_resp_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_DISABLE_QUEUE */ +struct gdma_disable_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 queue_index; + u32 alloc_res_id_on_creation; +}; /* HW DATA */ + +/* GDMA_CREATE_DMA_REGION */ +struct gdma_create_dma_region_req { + struct gdma_req_hdr hdr; + + /* The total size of the DMA region */ + u64 length; + + /* The offset in the first page */ + u32 offset_in_page; + + /* enum gdma_page_type */ + u32 gdma_page_type; + + /* The total number of pages */ + u32 page_count; + + /* If page_addr_list_len is smaller than page_count, + * the remaining page addresses will be added via the + * message GDMA_DMA_REGION_ADD_PAGES. + */ + u32 page_addr_list_len; + u64 page_addr_list[]; +}; /* HW DATA */ + +struct gdma_create_dma_region_resp { + struct gdma_resp_hdr hdr; + u64 gdma_region; +}; /* HW DATA */ + +/* GDMA_DMA_REGION_ADD_PAGES */ +struct gdma_dma_region_add_pages_req { + struct gdma_req_hdr hdr; + + u64 gdma_region; + + u32 page_addr_list_len; + u32 reserved3; + + u64 page_addr_list[]; +}; /* HW DATA */ + +/* GDMA_DESTROY_DMA_REGION */ +struct gdma_destroy_dma_region_req { + struct gdma_req_hdr hdr; + + u64 gdma_region; +}; /* HW DATA */ + +int mana_gd_verify_vf_version(struct pci_dev *pdev); + +int mana_gd_register_device(struct gdma_dev *gd); +int mana_gd_deregister_device(struct gdma_dev *gd); + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r); +void mana_gd_free_res_map(struct gdma_resource *r); + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, + struct gdma_queue *queue); + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi); + +void mana_gd_free_memory(struct gdma_mem_info *gmi); + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp); +#endif /* _GDMA_H */ diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c new file mode 100644 index 000000000000..2f87bf90f8ec --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -0,0 +1,1415 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "mana.h" + +static u32 mana_gd_r32(struct gdma_context *g, u64 offset) +{ + return readl(g->bar0_va + offset); +} + +static u64 mana_gd_r64(struct gdma_context *g, u64 offset) +{ + return readq(g->bar0_va + offset); +} + +static void mana_gd_init_registers(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; + + gc->db_page_base = gc->bar0_va + + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + + gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); +} + +static int mana_gd_query_max_resources(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_query_max_resources_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, + sizeof(req), sizeof(resp)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to query resource info: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + if (gc->num_msix_usable > resp.max_msix) + gc->num_msix_usable = resp.max_msix; + + if (gc->num_msix_usable <= 1) + return -ENOSPC; + + gc->max_num_queues = num_online_cpus(); + if (gc->max_num_queues > MANA_MAX_NUM_QUEUES) + gc->max_num_queues = MANA_MAX_NUM_QUEUES; + + if (gc->max_num_queues > resp.max_eq) + gc->max_num_queues = resp.max_eq; + + if (gc->max_num_queues > resp.max_cq) + gc->max_num_queues = resp.max_cq; + + if (gc->max_num_queues > resp.max_sq) + gc->max_num_queues = resp.max_sq; + + if (gc->max_num_queues > resp.max_rq) + gc->max_num_queues = resp.max_rq; + + return 0; +} + +static int mana_gd_detect_devices(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_list_devices_resp resp = {}; + struct gdma_general_req req = {}; + struct gdma_dev_id dev; + u32 i, max_num_devs; + u16 dev_type; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), + sizeof(resp)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to detect devices: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); + + for (i = 0; i < max_num_devs; i++) { + dev = resp.devs[i]; + dev_type = dev.type; + + /* HWC is already detected in mana_hwc_create_channel(). */ + if (dev_type == GDMA_DEVICE_HWC) + continue; + + if (dev_type == GDMA_DEVICE_MANA) { + gc->mana.gdma_context = gc; + gc->mana.dev_id = dev; + } + } + + return gc->mana.dev_id.type == 0 ? -ENODEV : 0; +} + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + + return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); +} + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi) +{ + dma_addr_t dma_handle; + void *buf; + + if (length < PAGE_SIZE || !is_power_of_2(length)) + return -EINVAL; + + gmi->dev = gc->dev; + buf = dma_alloc_coherent(gmi->dev, length, &dma_handle, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + gmi->dma_handle = dma_handle; + gmi->virt_addr = buf; + gmi->length = length; + + return 0; +} + +void mana_gd_free_memory(struct gdma_mem_info *gmi) +{ + dma_free_coherent(gmi->dev, gmi->length, gmi->virt_addr, + gmi->dma_handle); +} + +static int mana_gd_create_hw_eq(struct gdma_context *gc, + struct gdma_queue *queue) +{ + struct gdma_create_queue_resp resp = {}; + struct gdma_create_queue_req req = {}; + int err; + + if (queue->type != GDMA_EQ) + return -EINVAL; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = queue->gdma_dev->dev_id; + req.type = queue->type; + req.pdid = queue->gdma_dev->pdid; + req.doolbell_id = queue->gdma_dev->doorbell; + req.gdma_region = queue->mem_info.gdma_region; + req.queue_size = queue->queue_size; + req.log2_throttle_limit = queue->eq.log2_throttle_limit; + req.eq_pci_msix_index = queue->eq.msix_index; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to create queue: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + queue->id = resp.queue_index; + queue->eq.disable_needed = true; + queue->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + return 0; +} + +static int mana_gd_disable_queue(struct gdma_queue *queue) +{ + struct gdma_context *gc = queue->gdma_dev->gdma_context; + struct gdma_disable_queue_req req = {}; + struct gdma_general_resp resp = {}; + int err; + + WARN_ON(queue->type != GDMA_EQ); + + mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = queue->gdma_dev->dev_id; + req.type = queue->type; + req.queue_index = queue->id; + req.alloc_res_id_on_creation = 1; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + return 0; +} + +#define DOORBELL_OFFSET_SQ 0x0 +#define DOORBELL_OFFSET_RQ 0x400 +#define DOORBELL_OFFSET_CQ 0x800 +#define DOORBELL_OFFSET_EQ 0xFF8 + +static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index, + enum gdma_queue_type q_type, u32 qid, + u32 tail_ptr, u8 num_req) +{ + void __iomem *addr = gc->db_page_base + gc->db_page_size * db_index; + union gdma_doorbell_entry e = {}; + + switch (q_type) { + case GDMA_EQ: + e.eq.id = qid; + e.eq.tail_ptr = tail_ptr; + e.eq.arm = num_req; + + addr += DOORBELL_OFFSET_EQ; + break; + + case GDMA_CQ: + e.cq.id = qid; + e.cq.tail_ptr = tail_ptr; + e.cq.arm = num_req; + + addr += DOORBELL_OFFSET_CQ; + break; + + case GDMA_RQ: + e.rq.id = qid; + e.rq.tail_ptr = tail_ptr; + e.rq.wqe_cnt = num_req; + + addr += DOORBELL_OFFSET_RQ; + break; + + case GDMA_SQ: + e.sq.id = qid; + e.sq.tail_ptr = tail_ptr; + + addr += DOORBELL_OFFSET_SQ; + break; + + default: + WARN_ON(1); + return; + } + + /* Ensure all writes are done before ring doorbell */ + wmb(); + + writeq(e.as_uint64, addr); +} + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) +{ + mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, + queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); +} + +void mana_gd_arm_cq(struct gdma_queue *cq) +{ + struct gdma_context *gc = cq->gdma_dev->gdma_context; + + u32 num_cqe = cq->queue_size / GDMA_CQE_SIZE; + + u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, + head, SET_ARM_BIT); +} + +static void mana_gd_process_eqe(struct gdma_queue *eq) +{ + u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); + struct gdma_context *gc = eq->gdma_dev->gdma_context; + struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; + union gdma_eqe_info eqe_info; + enum gdma_eqe_type type; + struct gdma_event event; + struct gdma_queue *cq; + struct gdma_eqe *eqe; + u32 cq_id; + + eqe = &eq_eqe_ptr[head]; + eqe_info.as_uint32 = eqe->eqe_info; + type = eqe_info.type; + + switch (type) { + case GDMA_EQE_COMPLETION: + cq_id = eqe->details[0] & 0xFFFFFF; + if (WARN_ON_ONCE(cq_id >= gc->max_num_cqs)) + break; + + cq = gc->cq_table[cq_id]; + if (WARN_ON_ONCE(!cq || cq->type != GDMA_CQ || cq->id != cq_id)) + break; + + if (cq->cq.callback) + cq->cq.callback(cq->cq.context, cq); + + break; + + case GDMA_EQE_TEST_EVENT: + gc->test_event_eq_id = eq->id; + complete(&gc->eq_test_event); + break; + + case GDMA_EQE_HWC_INIT_EQ_ID_DB: + case GDMA_EQE_HWC_INIT_DATA: + case GDMA_EQE_HWC_INIT_DONE: + if (!eq->eq.callback) + break; + + event.type = type; + memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE); + eq->eq.callback(eq->eq.context, eq, &event); + break; + + default: + break; + } +} + +static void mana_gd_process_eq_events(void *arg) +{ + u32 owner_bits, new_bits, old_bits; + union gdma_eqe_info eqe_info; + struct gdma_eqe *eq_eqe_ptr; + struct gdma_queue *eq = arg; + struct gdma_context *gc; + struct gdma_eqe *eqe; + unsigned int arm_bit; + u32 head, num_eqe; + int i; + + gc = eq->gdma_dev->gdma_context; + + num_eqe = eq->queue_size / GDMA_EQE_SIZE; + eq_eqe_ptr = eq->queue_mem_ptr; + + /* Process up to 5 EQEs at a time, and update the HW head. */ + for (i = 0; i < 5; i++) { + eqe = &eq_eqe_ptr[eq->head % num_eqe]; + eqe_info.as_uint32 = eqe->eqe_info; + owner_bits = eqe_info.owner_bits; + + old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; + /* No more entries */ + if (owner_bits == old_bits) + break; + + new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; + if (owner_bits != new_bits) { + dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id); + break; + } + + mana_gd_process_eqe(eq); + + eq->head++; + } + + /* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */ + if (mana_gd_is_hwc(eq->gdma_dev)) { + arm_bit = SET_ARM_BIT; + } else if (eq->eq.work_done < eq->eq.budget && + napi_complete_done(&eq->eq.napi, eq->eq.work_done)) { + arm_bit = SET_ARM_BIT; + } else { + arm_bit = 0; + } + + head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, + head, arm_bit); +} + +static int mana_poll(struct napi_struct *napi, int budget) +{ + struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi); + + eq->eq.work_done = 0; + eq->eq.budget = budget; + + mana_gd_process_eq_events(eq); + + return min(eq->eq.work_done, budget); +} + +static void mana_gd_schedule_napi(void *arg) +{ + struct gdma_queue *eq = arg; + struct napi_struct *napi; + + napi = &eq->eq.napi; + napi_schedule_irqoff(napi); +} + +static int mana_gd_register_irq(struct gdma_queue *queue, + const struct gdma_queue_spec *spec) +{ + struct gdma_dev *gd = queue->gdma_dev; + bool is_mana = mana_gd_is_mana(gd); + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; + unsigned int msi_index; + unsigned long flags; + int err; + + gc = gd->gdma_context; + r = &gc->msix_resource; + + spin_lock_irqsave(&r->lock, flags); + + msi_index = find_first_zero_bit(r->map, r->size); + if (msi_index >= r->size) { + err = -ENOSPC; + } else { + bitmap_set(r->map, msi_index, 1); + queue->eq.msix_index = msi_index; + err = 0; + } + + spin_unlock_irqrestore(&r->lock, flags); + + if (err) + return err; + + WARN_ON(msi_index >= gc->num_msix_usable); + + gic = &gc->irq_contexts[msi_index]; + + if (is_mana) { + netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll, + NAPI_POLL_WEIGHT); + napi_enable(&queue->eq.napi); + } + + WARN_ON(gic->handler || gic->arg); + + gic->arg = queue; + + if (is_mana) + gic->handler = mana_gd_schedule_napi; + else + gic->handler = mana_gd_process_eq_events; + + return 0; +} + +static void mana_gd_deregiser_irq(struct gdma_queue *queue) +{ + struct gdma_dev *gd = queue->gdma_dev; + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; + unsigned int msix_index; + unsigned long flags; + + gc = gd->gdma_context; + r = &gc->msix_resource; + + /* At most num_online_cpus() + 1 interrupts are used. */ + msix_index = queue->eq.msix_index; + if (WARN_ON(msix_index >= gc->num_msix_usable)) + return; + + gic = &gc->irq_contexts[msix_index]; + gic->handler = NULL; + gic->arg = NULL; + + spin_lock_irqsave(&r->lock, flags); + bitmap_clear(r->map, msix_index, 1); + spin_unlock_irqrestore(&r->lock, flags); + + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; +} + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) +{ + struct gdma_generate_test_event_req req = {}; + struct gdma_general_resp resp = {}; + struct device *dev = gc->dev; + int err; + + mutex_lock(&gc->eq_test_event_mutex); + + init_completion(&gc->eq_test_event); + gc->test_event_eq_id = INVALID_QUEUE_ID; + + mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = eq->gdma_dev->dev_id; + req.queue_index = eq->id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + dev_err(dev, "test_eq failed: %d\n", err); + goto out; + } + + err = -EPROTO; + + if (resp.hdr.status) { + dev_err(dev, "test_eq failed: 0x%x\n", resp.hdr.status); + goto out; + } + + if (!wait_for_completion_timeout(&gc->eq_test_event, 30 * HZ)) { + dev_err(dev, "test_eq timed out on queue %d\n", eq->id); + goto out; + } + + if (eq->id != gc->test_event_eq_id) { + dev_err(dev, "test_eq got an event on wrong queue %d (%d)\n", + gc->test_event_eq_id, eq->id); + goto out; + } + + err = 0; +out: + mutex_unlock(&gc->eq_test_event_mutex); + return err; +} + +static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, + struct gdma_queue *queue) +{ + int err; + + if (flush_evenets) { + err = mana_gd_test_eq(gc, queue); + if (err) + dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); + } + + mana_gd_deregiser_irq(queue); + + if (mana_gd_is_mana(queue->gdma_dev)) { + napi_disable(&queue->eq.napi); + netif_napi_del(&queue->eq.napi); + } + + if (queue->eq.disable_needed) + mana_gd_disable_queue(queue); +} + +static int mana_gd_create_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + bool create_hwq, struct gdma_queue *queue) +{ + struct gdma_context *gc = gd->gdma_context; + struct device *dev = gc->dev; + u32 log2_num_entries; + int err; + + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; + + log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE); + + if (spec->eq.log2_throttle_limit > log2_num_entries) { + dev_err(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n", + spec->eq.log2_throttle_limit, log2_num_entries); + return -EINVAL; + } + + err = mana_gd_register_irq(queue, spec); + if (err) { + dev_err(dev, "Failed to register irq: %d\n", err); + return err; + } + + queue->eq.callback = spec->eq.callback; + queue->eq.context = spec->eq.context; + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1; + + if (create_hwq) { + err = mana_gd_create_hw_eq(gc, queue); + if (err) + goto out; + + err = mana_gd_test_eq(gc, queue); + if (err) + goto out; + } + + return 0; +out: + dev_err(dev, "Failed to create EQ: %d\n", err); + mana_gd_destroy_eq(gc, false, queue); + return err; +} + +static void mana_gd_create_cq(const struct gdma_queue_spec *spec, + struct gdma_queue *queue) +{ + u32 log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE); + + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + queue->cq.parent = spec->cq.parent_eq; + queue->cq.context = spec->cq.context; + queue->cq.callback = spec->cq.callback; +} + +static void mana_gd_destroy_cq(struct gdma_context *gc, + struct gdma_queue *queue) +{ + u32 id = queue->id; + + if (id >= gc->max_num_cqs) + return; + + if (!gc->cq_table[id]) + return; + + gc->cq_table[id] = NULL; +} + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + if (spec->type == GDMA_EQ) + err = mana_gd_create_eq(gd, spec, false, queue); + else if (spec->type == GDMA_CQ) + mana_gd_create_cq(spec, queue); + + if (err) + goto out; + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +static void mana_gd_destroy_dma_region(struct gdma_context *gc, u64 gdma_region) +{ + struct gdma_destroy_dma_region_req req = {}; + struct gdma_general_resp resp = {}; + int err; + + if (gdma_region == GDMA_INVALID_DMA_REGION) + return; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req), + sizeof(resp)); + req.gdma_region = gdma_region; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) + dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", + err, resp.hdr.status); +} + +static int mana_gd_create_dma_region(struct gdma_dev *gd, + struct gdma_mem_info *gmi) +{ + unsigned int num_page = gmi->length / PAGE_SIZE; + struct gdma_create_dma_region_req *req = NULL; + struct gdma_create_dma_region_resp resp = {}; + struct gdma_context *gc = gd->gdma_context; + struct hw_channel_context *hwc; + u32 length = gmi->length; + u32 req_msg_size; + int err; + int i; + + if (length < PAGE_SIZE || !is_power_of_2(length)) + return -EINVAL; + + if (offset_in_page(gmi->virt_addr) != 0) + return -EINVAL; + + hwc = gc->hwc.driver_data; + req_msg_size = sizeof(*req) + num_page * sizeof(u64); + if (req_msg_size > hwc->max_req_msg_size) + return -EINVAL; + + req = kzalloc(req_msg_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION, + req_msg_size, sizeof(resp)); + req->length = length; + req->offset_in_page = 0; + req->gdma_page_type = GDMA_PAGE_TYPE_4K; + req->page_count = num_page; + req->page_addr_list_len = num_page; + + for (i = 0; i < num_page; i++) + req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + + err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); + if (err) + goto out; + + if (resp.hdr.status || resp.gdma_region == GDMA_INVALID_DMA_REGION) { + dev_err(gc->dev, "Failed to create DMA region: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + goto out; + } + + gmi->gdma_region = resp.gdma_region; +out: + kfree(req); + return err; +} + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + if (spec->type != GDMA_EQ) + return -EINVAL; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + err = mana_gd_create_dma_region(gd, gmi); + if (err) + goto out; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + err = mana_gd_create_eq(gd, spec, true, queue); + if (err) + goto out; + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + if (spec->type != GDMA_CQ && spec->type != GDMA_SQ && + spec->type != GDMA_RQ) + return -EINVAL; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + err = mana_gd_create_dma_region(gd, gmi); + if (err) + goto out; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + if (spec->type == GDMA_CQ) + mana_gd_create_cq(spec, queue); + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) +{ + struct gdma_mem_info *gmi = &queue->mem_info; + + switch (queue->type) { + case GDMA_EQ: + mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue); + break; + + case GDMA_CQ: + mana_gd_destroy_cq(gc, queue); + break; + + case GDMA_RQ: + break; + + case GDMA_SQ: + break; + + default: + dev_err(gc->dev, "Can't destroy unknown queue: type=%d\n", + queue->type); + return; + } + + mana_gd_destroy_dma_region(gc, gmi->gdma_region); + mana_gd_free_memory(gmi); + kfree(queue); +} + +int mana_gd_verify_vf_version(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_verify_ver_resp resp = {}; + struct gdma_verify_ver_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION, + sizeof(req), sizeof(resp)); + + req.protocol_ver_min = GDMA_PROTOCOL_FIRST; + req.protocol_ver_max = GDMA_PROTOCOL_LAST; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + return 0; +} + +int mana_gd_register_device(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_register_device_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + gd->gpa_mkey = INVALID_MEM_KEY; + + mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req), + sizeof(resp)); + + req.hdr.dev_id = gd->dev_id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + gd->pdid = resp.pdid; + gd->gpa_mkey = resp.gpa_mkey; + gd->doorbell = resp.db_id; + + return 0; +} + +int mana_gd_deregister_device(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_general_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + if (gd->pdid == INVALID_PDID) + return -EINVAL; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req), + sizeof(resp)); + + req.hdr.dev_id = gd->dev_id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", + err, resp.hdr.status); + if (!err) + err = -EPROTO; + } + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + gd->gpa_mkey = INVALID_MEM_KEY; + + return err; +} + +u32 mana_gd_wq_avail_space(struct gdma_queue *wq) +{ + u32 used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE; + u32 wq_size = wq->queue_size; + + WARN_ON_ONCE(used_space > wq_size); + + return wq_size - used_space; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset) +{ + u32 offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1); + + WARN_ON_ONCE((offset + GDMA_WQE_BU_SIZE) > wq->queue_size); + + return wq->queue_mem_ptr + offset; +} + +static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, + enum gdma_queue_type q_type, + u32 client_oob_size, u32 sgl_data_size, + u8 *wqe_ptr) +{ + bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL); + bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0); + struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr; + u8 *ptr; + + memset(header, 0, sizeof(struct gdma_wqe)); + header->num_sge = wqe_req->num_sge; + header->inline_oob_size_div4 = client_oob_size / sizeof(u32); + + if (oob_in_sgl) { + WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2); + + header->client_oob_in_sgl = 1; + + if (pad_data) + header->last_vbytes = wqe_req->sgl[0].size; + } + + if (q_type == GDMA_SQ) + header->client_data_unit = wqe_req->client_data_unit; + + /* The size of gdma_wqe + client_oob_size must be less than or equal + * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond + * the queue memory buffer boundary. + */ + ptr = wqe_ptr + sizeof(header); + + if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) { + memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size); + + if (client_oob_size > wqe_req->inline_oob_size) + memset(ptr + wqe_req->inline_oob_size, 0, + client_oob_size - wqe_req->inline_oob_size); + } + + return sizeof(header) + client_oob_size; +} + +static void mana_gd_write_sgl(struct gdma_queue *wq, u8 *wqe_ptr, + const struct gdma_wqe_request *wqe_req) +{ + u32 sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge; + const u8 *address = (u8 *)wqe_req->sgl; + u8 *base_ptr, *end_ptr; + u32 size_to_end; + + base_ptr = wq->queue_mem_ptr; + end_ptr = base_ptr + wq->queue_size; + size_to_end = (u32)(end_ptr - wqe_ptr); + + if (size_to_end < sgl_size) { + memcpy(wqe_ptr, address, size_to_end); + + wqe_ptr = base_ptr; + address += size_to_end; + sgl_size -= size_to_end; + } + + memcpy(wqe_ptr, address, sgl_size); +} + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info) +{ + u32 client_oob_size = wqe_req->inline_oob_size; + struct gdma_context *gc; + u32 sgl_data_size; + u32 max_wqe_size; + u32 wqe_size; + u8 *wqe_ptr; + + if (wqe_req->num_sge == 0) + return -EINVAL; + + if (wq->type == GDMA_RQ) { + if (client_oob_size != 0) + return -EINVAL; + + client_oob_size = INLINE_OOB_SMALL_SIZE; + + max_wqe_size = GDMA_MAX_RQE_SIZE; + } else { + if (client_oob_size != INLINE_OOB_SMALL_SIZE && + client_oob_size != INLINE_OOB_LARGE_SIZE) + return -EINVAL; + + max_wqe_size = GDMA_MAX_SQE_SIZE; + } + + sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge; + wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size + + sgl_data_size, GDMA_WQE_BU_SIZE); + if (wqe_size > max_wqe_size) + return -EINVAL; + + if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) { + gc = wq->gdma_dev->gdma_context; + dev_err(gc->dev, "unsuccessful flow control!\n"); + return -ENOSPC; + } + + if (wqe_info) + wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE; + + wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head); + wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size, + sgl_data_size, wqe_ptr); + if (wqe_ptr >= (u8 *)wq->queue_mem_ptr + wq->queue_size) + wqe_ptr -= wq->queue_size; + + mana_gd_write_sgl(wq, wqe_ptr, wqe_req); + + wq->head += wqe_size / GDMA_WQE_BU_SIZE; + + return 0; +} + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info) +{ + struct gdma_context *gc = queue->gdma_dev->gdma_context; + int err; + + err = mana_gd_post_work_request(queue, wqe_req, wqe_info); + if (err) + return err; + + mana_gd_wq_ring_doorbell(gc, queue); + + return 0; +} + +static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp) +{ + unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe); + struct gdma_cqe *cq_cqe = cq->queue_mem_ptr; + u32 owner_bits, new_bits, old_bits; + struct gdma_cqe *cqe; + + cqe = &cq_cqe[cq->head % num_cqe]; + owner_bits = cqe->cqe_info.owner_bits; + + old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK; + /* Return 0 if no more entries. */ + if (owner_bits == old_bits) + return 0; + + new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK; + /* Return -1 if overflow detected. */ + if (owner_bits != new_bits) + return -1; + + comp->wq_num = cqe->cqe_info.wq_num; + comp->is_sq = cqe->cqe_info.is_sq; + memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE); + + return 1; +} + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe) +{ + int cqe_idx; + int ret; + + for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) { + ret = mana_gd_read_cqe(cq, &comp[cqe_idx]); + + if (ret < 0) { + cq->head -= cqe_idx; + return ret; + } + + if (ret == 0) + break; + + cq->head++; + } + + return cqe_idx; +} + +static irqreturn_t mana_gd_intr(int irq, void *arg) +{ + struct gdma_irq_context *gic = arg; + + if (gic->handler) + gic->handler(gic->arg); + + return IRQ_HANDLED; +} + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r) +{ + r->map = bitmap_zalloc(res_avail, GFP_KERNEL); + if (!r->map) + return -ENOMEM; + + r->size = res_avail; + spin_lock_init(&r->lock); + + return 0; +} + +void mana_gd_free_res_map(struct gdma_resource *r) +{ + bitmap_free(r->map); + r->map = NULL; + r->size = 0; +} + +static int mana_gd_setup_irqs(struct pci_dev *pdev) +{ + unsigned int max_queues_per_port = num_online_cpus(); + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + unsigned int max_irqs; + int nvec, irq; + int err, i, j; + + if (max_queues_per_port > MANA_MAX_NUM_QUEUES) + max_queues_per_port = MANA_MAX_NUM_QUEUES; + + max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV; + + /* Need 1 interrupt for the Hardware communication Channel (HWC) */ + max_irqs++; + + nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX); + if (nvec < 0) + return nvec; + + gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context), + GFP_KERNEL); + if (!gc->irq_contexts) { + err = -ENOMEM; + goto free_irq_vector; + } + + for (i = 0; i < nvec; i++) { + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; + + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; + goto free_irq; + } + + err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); + if (err) + goto free_irq; + } + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) + goto free_irq; + + gc->max_num_msix = nvec; + gc->num_msix_usable = nvec; + + return 0; + +free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); + gic = &gc->irq_contexts[j]; + free_irq(irq, gic); + } + + kfree(gc->irq_contexts); + gc->irq_contexts = NULL; +free_irq_vector: + pci_free_irq_vectors(pdev); + return err; +} + +static void mana_gd_remove_irqs(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + int irq, i; + + if (gc->max_num_msix < 1) + return; + + mana_gd_free_res_map(&gc->msix_resource); + + for (i = 0; i < gc->max_num_msix; i++) { + irq = pci_irq_vector(pdev, i); + if (irq < 0) + continue; + + gic = &gc->irq_contexts[i]; + free_irq(irq, gic); + } + + pci_free_irq_vectors(pdev); + + gc->max_num_msix = 0; + gc->num_msix_usable = 0; + kfree(gc->irq_contexts); + gc->irq_contexts = NULL; +} + +static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct gdma_context *gc; + void __iomem *bar0_va; + int bar = 0; + int err; + + err = pci_enable_device(pdev); + if (err) + return -ENXIO; + + pci_set_master(pdev); + + err = pci_request_regions(pdev, "mana"); + if (err) + goto disable_dev; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + goto release_region; + + err = -ENOMEM; + gc = vzalloc(sizeof(*gc)); + if (!gc) + goto release_region; + + bar0_va = pci_iomap(pdev, bar, 0); + if (!bar0_va) + goto free_gc; + + gc->bar0_va = bar0_va; + gc->dev = &pdev->dev; + + pci_set_drvdata(pdev, gc); + + mana_gd_init_registers(pdev); + + mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); + + err = mana_gd_setup_irqs(pdev); + if (err) + goto unmap_bar; + + mutex_init(&gc->eq_test_event_mutex); + + err = mana_hwc_create_channel(gc); + if (err) + goto remove_irq; + + err = mana_gd_verify_vf_version(pdev); + if (err) + goto remove_irq; + + err = mana_gd_query_max_resources(pdev); + if (err) + goto remove_irq; + + err = mana_gd_detect_devices(pdev); + if (err) + goto remove_irq; + + err = mana_probe(&gc->mana); + if (err) + goto clean_up_gdma; + + return 0; + +clean_up_gdma: + mana_hwc_destroy_channel(gc); + vfree(gc->cq_table); + gc->cq_table = NULL; +remove_irq: + mana_gd_remove_irqs(pdev); +unmap_bar: + pci_iounmap(pdev, bar0_va); +free_gc: + vfree(gc); +release_region: + pci_release_regions(pdev); +disable_dev: + pci_clear_master(pdev); + pci_disable_device(pdev); + dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err); + return err; +} + +static void mana_gd_remove(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + mana_remove(&gc->mana); + + mana_hwc_destroy_channel(gc); + vfree(gc->cq_table); + gc->cq_table = NULL; + + mana_gd_remove_irqs(pdev); + + pci_iounmap(pdev, gc->bar0_va); + + vfree(gc); + + pci_release_regions(pdev); + pci_clear_master(pdev); + pci_disable_device(pdev); +} + +#ifndef PCI_VENDOR_ID_MICROSOFT +#define PCI_VENDOR_ID_MICROSOFT 0x1414 +#endif + +static const struct pci_device_id mana_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 0x00BA) }, + { } +}; + +static struct pci_driver mana_driver = { + .name = "mana", + .id_table = mana_id_table, + .probe = mana_gd_probe, + .remove = mana_gd_remove, +}; + +module_pci_driver(mana_driver); + +MODULE_DEVICE_TABLE(pci, mana_id_table); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Microsoft Azure Network Adapter driver"); diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c new file mode 100644 index 000000000000..462bc577692a --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include "gdma.h" +#include "hw_channel.h" + +static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) +{ + struct gdma_resource *r = &hwc->inflight_msg_res; + unsigned long flags; + u32 index; + + down(&hwc->sema); + + spin_lock_irqsave(&r->lock, flags); + + index = find_first_zero_bit(hwc->inflight_msg_res.map, + hwc->inflight_msg_res.size); + + bitmap_set(hwc->inflight_msg_res.map, index, 1); + + spin_unlock_irqrestore(&r->lock, flags); + + *msg_id = index; + + return 0; +} + +static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id) +{ + struct gdma_resource *r = &hwc->inflight_msg_res; + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1); + spin_unlock_irqrestore(&r->lock, flags); + + up(&hwc->sema); +} + +static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, + const struct gdma_resp_hdr *resp_msg, + u32 resp_len) +{ + if (resp_len < sizeof(*resp_msg)) + return -EPROTO; + + if (resp_len > caller_ctx->output_buflen) + return -EPROTO; + + return 0; +} + +static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, + const struct gdma_resp_hdr *resp_msg) +{ + struct hwc_caller_ctx *ctx; + int err = -EPROTO; + + if (!test_bit(resp_msg->response.hwc_msg_id, + hwc->inflight_msg_res.map)) { + dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", + resp_msg->response.hwc_msg_id); + return; + } + + ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; + err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); + if (err) + goto out; + + ctx->status_code = resp_msg->status; + + memcpy(ctx->output_buf, resp_msg, resp_len); +out: + ctx->error = err; + complete(&ctx->comp_event); +} + +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, + struct hwc_work_request *req) +{ + struct device *dev = hwc_rxq->hwc->dev; + struct gdma_sge *sge; + int err; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; + sge->size = req->buf_len; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); + return err; +} + +static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, + struct gdma_event *event) +{ + struct hw_channel_context *hwc = ctx; + struct gdma_dev *gd = hwc->gdma_dev; + union hwc_init_type_data type_data; + union hwc_init_eq_id_db eq_db; + u32 type, val; + + switch (event->type) { + case GDMA_EQE_HWC_INIT_EQ_ID_DB: + eq_db.as_uint32 = event->details[0]; + hwc->cq->gdma_eq->id = eq_db.eq_id; + gd->doorbell = eq_db.doorbell; + break; + + case GDMA_EQE_HWC_INIT_DATA: + type_data.as_uint32 = event->details[0]; + type = type_data.type; + val = type_data.value; + + switch (type) { + case HWC_INIT_DATA_CQID: + hwc->cq->gdma_cq->id = val; + break; + + case HWC_INIT_DATA_RQID: + hwc->rxq->gdma_wq->id = val; + break; + + case HWC_INIT_DATA_SQID: + hwc->txq->gdma_wq->id = val; + break; + + case HWC_INIT_DATA_QUEUE_DEPTH: + hwc->hwc_init_q_depth_max = (u16)val; + break; + + case HWC_INIT_DATA_MAX_REQUEST: + hwc->hwc_init_max_req_msg_size = val; + break; + + case HWC_INIT_DATA_MAX_RESPONSE: + hwc->hwc_init_max_resp_msg_size = val; + break; + + case HWC_INIT_DATA_MAX_NUM_CQS: + gd->gdma_context->max_num_cqs = val; + break; + + case HWC_INIT_DATA_PDID: + hwc->gdma_dev->pdid = val; + break; + + case HWC_INIT_DATA_GPA_MKEY: + hwc->rxq->msg_buf->gpa_mkey = val; + hwc->txq->msg_buf->gpa_mkey = val; + break; + } + + break; + + case GDMA_EQE_HWC_INIT_DONE: + complete(&hwc->hwc_init_eqe_comp); + break; + + default: + /* Ignore unknown events, which should never happen. */ + break; + } +} + +static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob) +{ + struct hw_channel_context *hwc = ctx; + struct hwc_wq *hwc_rxq = hwc->rxq; + struct hwc_work_request *rx_req; + struct gdma_resp_hdr *resp; + struct gdma_wqe *dma_oob; + struct gdma_queue *rq; + struct gdma_sge *sge; + u64 rq_base_addr; + u64 rx_req_idx; + u8 *wqe; + + if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id)) + return; + + rq = hwc_rxq->gdma_wq; + wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE); + dma_oob = (struct gdma_wqe *)wqe; + + sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4); + + /* Select the RX work request for virtual address and for reposting. */ + rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; + rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; + + rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; + resp = (struct gdma_resp_hdr *)rx_req->buf_va; + + if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { + dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", + resp->response.hwc_msg_id); + return; + } + + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + + /* Do no longer use 'resp', because the buffer is posted to the HW + * in the below mana_hwc_post_rx_wqe(). + */ + resp = NULL; + + mana_hwc_post_rx_wqe(hwc_rxq, rx_req); +} + +static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob) +{ + struct hw_channel_context *hwc = ctx; + struct hwc_wq *hwc_txq = hwc->txq; + + WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id); +} + +static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc, + enum gdma_queue_type type, u64 queue_size, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + if (type != GDMA_SQ && type != GDMA_RQ) + return -EINVAL; + + spec.type = type; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc, + u64 queue_size, + void *ctx, gdma_cq_callback *cb, + struct gdma_queue *parent_eq, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + spec.cq.context = ctx; + spec.cq.callback = cb; + spec.cq.parent_eq = parent_eq; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc, + u64 queue_size, + void *ctx, gdma_eq_callback *cb, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + spec.eq.context = ctx; + spec.eq.callback = cb; + spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) +{ + struct hwc_rx_oob comp_data = {}; + struct gdma_comp *completions; + struct hwc_cq *hwc_cq = ctx; + u32 comp_read, i; + + WARN_ON_ONCE(hwc_cq->gdma_cq != q_self); + + completions = hwc_cq->comp_buf; + comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth); + WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth); + + for (i = 0; i < comp_read; ++i) { + comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data; + + if (completions[i].is_sq) + hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx, + completions[i].wq_num, + &comp_data); + else + hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx, + completions[i].wq_num, + &comp_data); + } + + mana_gd_arm_cq(q_self); +} + +static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq) +{ + if (!hwc_cq) + return; + + kfree(hwc_cq->comp_buf); + + if (hwc_cq->gdma_cq) + mana_gd_destroy_queue(gc, hwc_cq->gdma_cq); + + if (hwc_cq->gdma_eq) + mana_gd_destroy_queue(gc, hwc_cq->gdma_eq); + + kfree(hwc_cq); +} + +static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, + gdma_eq_callback *callback, void *ctx, + hwc_rx_event_handler_t *rx_ev_hdlr, + void *rx_ev_ctx, + hwc_tx_event_handler_t *tx_ev_hdlr, + void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr) +{ + struct gdma_queue *eq, *cq; + struct gdma_comp *comp_buf; + struct hwc_cq *hwc_cq; + u32 eq_size, cq_size; + int err; + + eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); + if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) + eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); + if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) + cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); + if (!hwc_cq) + return -ENOMEM; + + err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err); + goto out; + } + hwc_cq->gdma_eq = eq; + + err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event, + eq, &cq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err); + goto out; + } + hwc_cq->gdma_cq = cq; + + comp_buf = kcalloc(q_depth, sizeof(struct gdma_comp), GFP_KERNEL); + if (!comp_buf) { + err = -ENOMEM; + goto out; + } + + hwc_cq->hwc = hwc; + hwc_cq->comp_buf = comp_buf; + hwc_cq->queue_depth = q_depth; + hwc_cq->rx_event_handler = rx_ev_hdlr; + hwc_cq->rx_event_ctx = rx_ev_ctx; + hwc_cq->tx_event_handler = tx_ev_hdlr; + hwc_cq->tx_event_ctx = tx_ev_ctx; + + *hwc_cq_ptr = hwc_cq; + return 0; +out: + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); + return err; +} + +static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, + u32 max_msg_size, + struct hwc_dma_buf **dma_buf_ptr) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_work_request *hwc_wr; + struct hwc_dma_buf *dma_buf; + struct gdma_mem_info *gmi; + void *virt_addr; + u32 buf_size; + u8 *base_pa; + int err; + u16 i; + + dma_buf = kzalloc(sizeof(*dma_buf) + + q_depth * sizeof(struct hwc_work_request), + GFP_KERNEL); + if (!dma_buf) + return -ENOMEM; + + dma_buf->num_reqs = q_depth; + + buf_size = PAGE_ALIGN(q_depth * max_msg_size); + + gmi = &dma_buf->mem_info; + err = mana_gd_alloc_memory(gc, buf_size, gmi); + if (err) { + dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + goto out; + } + + virt_addr = dma_buf->mem_info.virt_addr; + base_pa = (u8 *)dma_buf->mem_info.dma_handle; + + for (i = 0; i < q_depth; i++) { + hwc_wr = &dma_buf->reqs[i]; + + hwc_wr->buf_va = virt_addr + i * max_msg_size; + hwc_wr->buf_sge_addr = base_pa + i * max_msg_size; + + hwc_wr->buf_len = max_msg_size; + } + + *dma_buf_ptr = dma_buf; + return 0; +out: + kfree(dma_buf); + return err; +} + +static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc, + struct hwc_dma_buf *dma_buf) +{ + if (!dma_buf) + return; + + mana_gd_free_memory(&dma_buf->mem_info); + + kfree(dma_buf); +} + +static void mana_hwc_destroy_wq(struct hw_channel_context *hwc, + struct hwc_wq *hwc_wq) +{ + if (!hwc_wq) + return; + + mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf); + + if (hwc_wq->gdma_wq) + mana_gd_destroy_queue(hwc->gdma_dev->gdma_context, + hwc_wq->gdma_wq); + + kfree(hwc_wq); +} + +static int mana_hwc_create_wq(struct hw_channel_context *hwc, + enum gdma_queue_type q_type, u16 q_depth, + u32 max_msg_size, struct hwc_cq *hwc_cq, + struct hwc_wq **hwc_wq_ptr) +{ + struct gdma_queue *queue; + struct hwc_wq *hwc_wq; + u32 queue_size; + int err; + + WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ); + + if (q_type == GDMA_RQ) + queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth); + else + queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); + + if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) + queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); + if (!hwc_wq) + return -ENOMEM; + + err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue); + if (err) + goto out; + + err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, + &hwc_wq->msg_buf); + if (err) + goto out; + + hwc_wq->hwc = hwc; + hwc_wq->gdma_wq = queue; + hwc_wq->queue_depth = q_depth; + hwc_wq->hwc_cq = hwc_cq; + + *hwc_wq_ptr = hwc_wq; + return 0; +out: + if (err) + mana_hwc_destroy_wq(hwc, hwc_wq); + return err; +} + +static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq, + struct hwc_work_request *req, + u32 dest_virt_rq_id, u32 dest_virt_rcq_id, + bool dest_pf) +{ + struct device *dev = hwc_txq->hwc->dev; + struct hwc_tx_oob *tx_oob; + struct gdma_sge *sge; + int err; + + if (req->msg_size == 0 || req->msg_size > req->buf_len) { + dev_err(dev, "wrong msg_size: %u, buf_len: %u\n", + req->msg_size, req->buf_len); + return -EINVAL; + } + + tx_oob = &req->tx_oob; + + tx_oob->vrq_id = dest_virt_rq_id; + tx_oob->dest_vfid = 0; + tx_oob->vrcq_id = dest_virt_rcq_id; + tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id; + tx_oob->loopback = false; + tx_oob->lso_override = false; + tx_oob->dest_pf = dest_pf; + tx_oob->vsq_id = hwc_txq->gdma_wq->id; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_txq->msg_buf->gpa_mkey; + sge->size = req->msg_size; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob); + req->wqe_req.inline_oob_data = tx_oob; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err); + return err; +} + +static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc, + u16 num_msg) +{ + int err; + + sema_init(&hwc->sema, num_msg); + + err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res); + if (err) + dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err); + return err; +} + +static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth, + u32 max_req_msg_size, u32 max_resp_msg_size) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_wq *hwc_rxq = hwc->rxq; + struct hwc_work_request *req; + struct hwc_caller_ctx *ctx; + int err; + int i; + + /* Post all WQEs on the RQ */ + for (i = 0; i < q_depth; i++) { + req = &hwc_rxq->msg_buf->reqs[i]; + err = mana_hwc_post_rx_wqe(hwc_rxq, req); + if (err) + return err; + } + + ctx = kzalloc(q_depth * sizeof(struct hwc_caller_ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < q_depth; ++i) + init_completion(&ctx[i].comp_event); + + hwc->caller_ctx = ctx; + + return mana_gd_test_eq(gc, hwc->cq->gdma_eq); +} + +static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, + u32 *max_req_msg_size, + u32 *max_resp_msg_size) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + struct gdma_queue *rq = hwc->rxq->gdma_wq; + struct gdma_queue *sq = hwc->txq->gdma_wq; + struct gdma_queue *eq = hwc->cq->gdma_eq; + struct gdma_queue *cq = hwc->cq->gdma_cq; + int err; + + init_completion(&hwc->hwc_init_eqe_comp); + + err = mana_smc_setup_hwc(&gc->shm_channel, false, + eq->mem_info.dma_handle, + cq->mem_info.dma_handle, + rq->mem_info.dma_handle, + sq->mem_info.dma_handle, + eq->eq.msix_index); + if (err) + return err; + + if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ)) + return -ETIMEDOUT; + + *q_depth = hwc->hwc_init_q_depth_max; + *max_req_msg_size = hwc->hwc_init_max_req_msg_size; + *max_resp_msg_size = hwc->hwc_init_max_resp_msg_size; + + if (WARN_ON(cq->id >= gc->max_num_cqs)) + return -EPROTO; + + gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *)); + if (!gc->cq_table) + return -ENOMEM; + + gc->cq_table[cq->id] = cq; + + return 0; +} + +static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth, + u32 max_req_msg_size, u32 max_resp_msg_size) +{ + struct hwc_wq *hwc_rxq = NULL; + struct hwc_wq *hwc_txq = NULL; + struct hwc_cq *hwc_cq = NULL; + int err; + + err = mana_hwc_init_inflight_msg(hwc, q_depth); + if (err) + return err; + + /* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ + * queue depth and RQ queue depth. + */ + err = mana_hwc_create_cq(hwc, q_depth * 2, + mana_hwc_init_event_handler, hwc, + mana_hwc_rx_event_handler, hwc, + mana_hwc_tx_event_handler, hwc, &hwc_cq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err); + goto out; + } + hwc->cq = hwc_cq; + + err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size, + hwc_cq, &hwc_rxq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err); + goto out; + } + hwc->rxq = hwc_rxq; + + err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size, + hwc_cq, &hwc_txq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err); + goto out; + } + hwc->txq = hwc_txq; + + hwc->num_inflight_msg = q_depth; + hwc->max_req_msg_size = max_req_msg_size; + + return 0; +out: + if (hwc_txq) + mana_hwc_destroy_wq(hwc, hwc_txq); + + if (hwc_rxq) + mana_hwc_destroy_wq(hwc, hwc_rxq); + + if (hwc_cq) + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); + + mana_gd_free_res_map(&hwc->inflight_msg_res); + return err; +} + +int mana_hwc_create_channel(struct gdma_context *gc) +{ + u32 max_req_msg_size, max_resp_msg_size; + struct gdma_dev *gd = &gc->hwc; + struct hw_channel_context *hwc; + u16 q_depth_max; + int err; + + hwc = kzalloc(sizeof(*hwc), GFP_KERNEL); + if (!hwc) + return -ENOMEM; + + gd->gdma_context = gc; + gd->driver_data = hwc; + hwc->gdma_dev = gd; + hwc->dev = gc->dev; + + /* HWC's instance number is always 0. */ + gd->dev_id.as_uint32 = 0; + gd->dev_id.type = GDMA_DEVICE_HWC; + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + + err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, + HW_CHANNEL_MAX_REQUEST_SIZE, + HW_CHANNEL_MAX_RESPONSE_SIZE); + if (err) { + dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err); + goto out; + } + + err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size, + &max_resp_msg_size); + if (err) { + dev_err(hwc->dev, "Failed to establish HWC: %d\n", err); + goto out; + } + + err = mana_hwc_test_channel(gc->hwc.driver_data, + HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, + max_req_msg_size, max_resp_msg_size); + if (err) { + dev_err(hwc->dev, "Failed to test HWC: %d\n", err); + goto out; + } + + return 0; +out: + kfree(hwc); + return err; +} + +void mana_hwc_destroy_channel(struct gdma_context *gc) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + struct hwc_caller_ctx *ctx; + + mana_smc_teardown_hwc(&gc->shm_channel, false); + + ctx = hwc->caller_ctx; + kfree(ctx); + hwc->caller_ctx = NULL; + + mana_hwc_destroy_wq(hwc, hwc->txq); + hwc->txq = NULL; + + mana_hwc_destroy_wq(hwc, hwc->rxq); + hwc->rxq = NULL; + + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); + hwc->cq = NULL; + + mana_gd_free_res_map(&hwc->inflight_msg_res); + + hwc->num_inflight_msg = 0; + + if (hwc->gdma_dev->pdid != INVALID_PDID) { + hwc->gdma_dev->doorbell = INVALID_DOORBELL; + hwc->gdma_dev->pdid = INVALID_PDID; + } + + kfree(hwc); + gc->hwc.driver_data = NULL; + gc->hwc.gdma_context = NULL; +} + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp) +{ + struct hwc_work_request *tx_wr; + struct hwc_wq *txq = hwc->txq; + struct gdma_req_hdr *req_msg; + struct hwc_caller_ctx *ctx; + u16 msg_id; + int err; + + mana_hwc_get_msg_index(hwc, &msg_id); + + tx_wr = &txq->msg_buf->reqs[msg_id]; + + if (req_len > tx_wr->buf_len) { + dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len, + tx_wr->buf_len); + err = -EINVAL; + goto out; + } + + ctx = hwc->caller_ctx + msg_id; + ctx->output_buf = resp; + ctx->output_buflen = resp_len; + + req_msg = (struct gdma_req_hdr *)tx_wr->buf_va; + if (req) + memcpy(req_msg, req, req_len); + + req_msg->req.hwc_msg_id = msg_id; + + tx_wr->msg_size = req_len; + + err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false); + if (err) { + dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err); + goto out; + } + + if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) { + dev_err(hwc->dev, "HWC: Request timed out!\n"); + err = -ETIMEDOUT; + goto out; + } + + if (ctx->error) { + err = ctx->error; + goto out; + } + + if (ctx->status_code) { + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); + err = -EPROTO; + goto out; + } +out: + mana_hwc_put_msg_index(hwc, msg_id); + return err; +} diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/drivers/net/ethernet/microsoft/mana/hw_channel.h new file mode 100644 index 000000000000..31c6e83c454a --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _HW_CHANNEL_H +#define _HW_CHANNEL_H + +#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4 + +#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000 +#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000 + +#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1 + +#define HWC_INIT_DATA_CQID 1 +#define HWC_INIT_DATA_RQID 2 +#define HWC_INIT_DATA_SQID 3 +#define HWC_INIT_DATA_QUEUE_DEPTH 4 +#define HWC_INIT_DATA_MAX_REQUEST 5 +#define HWC_INIT_DATA_MAX_RESPONSE 6 +#define HWC_INIT_DATA_MAX_NUM_CQS 7 +#define HWC_INIT_DATA_PDID 8 +#define HWC_INIT_DATA_GPA_MKEY 9 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +union hwc_init_eq_id_db { + u32 as_uint32; + + struct { + u32 eq_id : 16; + u32 doorbell : 16; + }; +}; /* HW DATA */ + +union hwc_init_type_data { + u32 as_uint32; + + struct { + u32 value : 24; + u32 type : 8; + }; +}; /* HW DATA */ + +struct hwc_rx_oob { + u32 type : 6; + u32 eom : 1; + u32 som : 1; + u32 vendor_err : 8; + u32 reserved1 : 16; + + u32 src_virt_wq : 24; + u32 src_vfid : 8; + + u32 reserved2; + + union { + u32 wqe_addr_low; + u32 wqe_offset; + }; + + u32 wqe_addr_high; + + u32 client_data_unit : 14; + u32 reserved3 : 18; + + u32 tx_oob_data_size; + + u32 chunk_offset : 21; + u32 reserved4 : 11; +}; /* HW DATA */ + +struct hwc_tx_oob { + u32 reserved1; + + u32 reserved2; + + u32 vrq_id : 24; + u32 dest_vfid : 8; + + u32 vrcq_id : 24; + u32 reserved3 : 8; + + u32 vscq_id : 24; + u32 loopback : 1; + u32 lso_override: 1; + u32 dest_pf : 1; + u32 reserved4 : 5; + + u32 vsq_id : 24; + u32 reserved5 : 8; +}; /* HW DATA */ + +struct hwc_work_request { + void *buf_va; + void *buf_sge_addr; + u32 buf_len; + u32 msg_size; + + struct gdma_wqe_request wqe_req; + struct hwc_tx_oob tx_oob; + + struct gdma_sge sge; +}; + +/* hwc_dma_buf represents the array of in-flight WQEs. + * mem_info as know as the GDMA mapped memory is partitioned and used by + * in-flight WQEs. + * The number of WQEs is determined by the number of in-flight messages. + */ +struct hwc_dma_buf { + struct gdma_mem_info mem_info; + + u32 gpa_mkey; + + u32 num_reqs; + struct hwc_work_request reqs[]; +}; + +typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob); + +typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob); + +struct hwc_cq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_cq; + struct gdma_queue *gdma_eq; + struct gdma_comp *comp_buf; + u16 queue_depth; + + hwc_rx_event_handler_t *rx_event_handler; + void *rx_event_ctx; + + hwc_tx_event_handler_t *tx_event_handler; + void *tx_event_ctx; +}; + +struct hwc_wq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_wq; + struct hwc_dma_buf *msg_buf; + u16 queue_depth; + + struct hwc_cq *hwc_cq; +}; + +struct hwc_caller_ctx { + struct completion comp_event; + void *output_buf; + u32 output_buflen; + + u32 error; /* Linux error code */ + u32 status_code; +}; + +struct hw_channel_context { + struct gdma_dev *gdma_dev; + struct device *dev; + + u16 num_inflight_msg; + u32 max_req_msg_size; + + u16 hwc_init_q_depth_max; + u32 hwc_init_max_req_msg_size; + u32 hwc_init_max_resp_msg_size; + + struct completion hwc_init_eqe_comp; + + struct hwc_wq *rxq; + struct hwc_wq *txq; + struct hwc_cq *cq; + + struct semaphore sema; + struct gdma_resource inflight_msg_res; + + struct hwc_caller_ctx *caller_ctx; +}; + +int mana_hwc_create_channel(struct gdma_context *gc); +void mana_hwc_destroy_channel(struct gdma_context *gc); + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp); + +#endif /* _HW_CHANNEL_H */ diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h new file mode 100644 index 000000000000..a2c3f826f022 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana.h @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _MANA_H +#define _MANA_H + +#include "gdma.h" +#include "hw_channel.h" + +/* Microsoft Azure Network Adapter (MANA)'s definitions + * + * Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* MANA protocol version */ +#define MANA_MAJOR_VERSION 0 +#define MANA_MINOR_VERSION 1 +#define MANA_MICRO_VERSION 1 + +typedef u64 mana_handle_t; +#define INVALID_MANA_HANDLE ((mana_handle_t)-1) + +enum TRI_STATE { + TRI_STATE_UNKNOWN = -1, + TRI_STATE_FALSE = 0, + TRI_STATE_TRUE = 1 +}; + +/* Number of entries for hardware indirection table must be in power of 2 */ +#define MANA_INDIRECT_TABLE_SIZE 64 +#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) + +/* The Toeplitz hash key's length in bytes: should be multiple of 8 */ +#define MANA_HASH_KEY_SIZE 40 + +#define COMP_ENTRY_SIZE 64 + +#define ADAPTER_MTU_SIZE 1500 +#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) + +#define RX_BUFFERS_PER_QUEUE 512 + +#define MAX_SEND_BUFFERS_PER_QUEUE 256 + +#define EQ_SIZE (8 * PAGE_SIZE) +#define LOG2_EQ_THROTTLE 3 + +#define MAX_PORTS_IN_MANA_DEV 16 + +struct mana_stats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + +struct mana_txq { + struct gdma_queue *gdma_sq; + + union { + u32 gdma_txq_id; + struct { + u32 reserved1 : 10; + u32 vsq_frame : 14; + u32 reserved2 : 8; + }; + }; + + u16 vp_offset; + + struct net_device *ndev; + + /* The SKBs are sent to the HW and we are waiting for the CQEs. */ + struct sk_buff_head pending_skbs; + struct netdev_queue *net_txq; + + atomic_t pending_sends; + + struct mana_stats stats; +}; + +/* skb data and frags dma mappings */ +struct mana_skb_head { + dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + + u32 size[MAX_SKB_FRAGS + 1]; +}; + +#define MANA_HEADROOM sizeof(struct mana_skb_head) + +enum mana_tx_pkt_format { + MANA_SHORT_PKT_FMT = 0, + MANA_LONG_PKT_FMT = 1, +}; + +struct mana_tx_short_oob { + u32 pkt_fmt : 2; + u32 is_outer_ipv4 : 1; + u32 is_outer_ipv6 : 1; + u32 comp_iphdr_csum : 1; + u32 comp_tcp_csum : 1; + u32 comp_udp_csum : 1; + u32 supress_txcqe_gen : 1; + u32 vcq_num : 24; + + u32 trans_off : 10; /* Transport header offset */ + u32 vsq_frame : 14; + u32 short_vp_offset : 8; +}; /* HW DATA */ + +struct mana_tx_long_oob { + u32 is_encap : 1; + u32 inner_is_ipv6 : 1; + u32 inner_tcp_opt : 1; + u32 inject_vlan_pri_tag : 1; + u32 reserved1 : 12; + u32 pcp : 3; /* 802.1Q */ + u32 dei : 1; /* 802.1Q */ + u32 vlan_id : 12; /* 802.1Q */ + + u32 inner_frame_offset : 10; + u32 inner_ip_rel_offset : 6; + u32 long_vp_offset : 12; + u32 reserved2 : 4; + + u32 reserved3; + u32 reserved4; +}; /* HW DATA */ + +struct mana_tx_oob { + struct mana_tx_short_oob s_oob; + struct mana_tx_long_oob l_oob; +}; /* HW DATA */ + +enum mana_cq_type { + MANA_CQ_TYPE_RX, + MANA_CQ_TYPE_TX, +}; + +enum mana_cqe_type { + CQE_INVALID = 0, + CQE_RX_OKAY = 1, + CQE_RX_COALESCED_4 = 2, + CQE_RX_OBJECT_FENCE = 3, + CQE_RX_TRUNCATED = 4, + + CQE_TX_OKAY = 32, + CQE_TX_SA_DROP = 33, + CQE_TX_MTU_DROP = 34, + CQE_TX_INVALID_OOB = 35, + CQE_TX_INVALID_ETH_TYPE = 36, + CQE_TX_HDR_PROCESSING_ERROR = 37, + CQE_TX_VF_DISABLED = 38, + CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, + CQE_TX_VPORT_DISABLED = 40, + CQE_TX_VLAN_TAGGING_VIOLATION = 41, +}; + +#define MANA_CQE_COMPLETION 1 + +struct mana_cqe_header { + u32 cqe_type : 6; + u32 client_type : 2; + u32 vendor_err : 24; +}; /* HW DATA */ + +/* NDIS HASH Types */ +#define NDIS_HASH_IPV4 BIT(0) +#define NDIS_HASH_TCP_IPV4 BIT(1) +#define NDIS_HASH_UDP_IPV4 BIT(2) +#define NDIS_HASH_IPV6 BIT(3) +#define NDIS_HASH_TCP_IPV6 BIT(4) +#define NDIS_HASH_UDP_IPV6 BIT(5) +#define NDIS_HASH_IPV6_EX BIT(6) +#define NDIS_HASH_TCP_IPV6_EX BIT(7) +#define NDIS_HASH_UDP_IPV6_EX BIT(8) + +#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) +#define MANA_HASH_L4 \ + (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ + NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) + +struct mana_rxcomp_perpkt_info { + u32 pkt_len : 16; + u32 reserved1 : 16; + u32 reserved2; + u32 pkt_hash; +}; /* HW DATA */ + +#define MANA_RXCOMP_OOB_NUM_PPI 4 + +/* Receive completion OOB */ +struct mana_rxcomp_oob { + struct mana_cqe_header cqe_hdr; + + u32 rx_vlan_id : 12; + u32 rx_vlantag_present : 1; + u32 rx_outer_iphdr_csum_succeed : 1; + u32 rx_outer_iphdr_csum_fail : 1; + u32 reserved1 : 1; + u32 rx_hashtype : 9; + u32 rx_iphdr_csum_succeed : 1; + u32 rx_iphdr_csum_fail : 1; + u32 rx_tcp_csum_succeed : 1; + u32 rx_tcp_csum_fail : 1; + u32 rx_udp_csum_succeed : 1; + u32 rx_udp_csum_fail : 1; + u32 reserved2 : 1; + + struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI]; + + u32 rx_wqe_offset; +}; /* HW DATA */ + +struct mana_tx_comp_oob { + struct mana_cqe_header cqe_hdr; + + u32 tx_data_offset; + + u32 tx_sgl_offset : 5; + u32 tx_wqe_offset : 27; + + u32 reserved[12]; +}; /* HW DATA */ + +struct mana_rxq; + +struct mana_cq { + struct gdma_queue *gdma_cq; + + /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */ + u32 gdma_id; + + /* Type of the CQ: TX or RX */ + enum mana_cq_type type; + + /* Pointer to the mana_rxq that is pushing RX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_RX. + */ + struct mana_rxq *rxq; + + /* Pointer to the mana_txq that is pushing TX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_TX. + */ + struct mana_txq *txq; + + /* Pointer to a buffer which the CQ handler can copy the CQE's into. */ + struct gdma_comp *gdma_comp_buf; +}; + +#define GDMA_MAX_RQE_SGES 15 + +struct mana_recv_buf_oob { + /* A valid GDMA work request representing the data buffer. */ + struct gdma_wqe_request wqe_req; + + void *buf_va; + dma_addr_t buf_dma_addr; + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; + struct gdma_sge sgl[GDMA_MAX_RQE_SGES]; + + /* Required to store the result of mana_gd_post_work_request. + * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the + * work queue when the WQE is consumed. + */ + struct gdma_posted_wqe_info wqe_inf; +}; + +struct mana_rxq { + struct gdma_queue *gdma_rq; + /* Cache the gdma receive queue id */ + u32 gdma_id; + + /* Index of RQ in the vPort, not gdma receive queue id */ + u32 rxq_idx; + + u32 datasize; + + mana_handle_t rxobj; + + struct mana_cq rx_cq; + + struct net_device *ndev; + + /* Total number of receive buffers to be allocated */ + u32 num_rx_buf; + + u32 buf_index; + + struct mana_stats stats; + + /* MUST BE THE LAST MEMBER: + * Each receive buffer has an associated mana_recv_buf_oob. + */ + struct mana_recv_buf_oob rx_oobs[]; +}; + +struct mana_tx_qp { + struct mana_txq txq; + + struct mana_cq tx_cq; + + mana_handle_t tx_object; +}; + +struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; +}; + +struct mana_context { + struct gdma_dev *gdma_dev; + + u16 num_ports; + + struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; +}; + +struct mana_port_context { + struct mana_context *ac; + struct net_device *ndev; + + u8 mac_addr[ETH_ALEN]; + + struct mana_eq *eqs; + + enum TRI_STATE rss_state; + + mana_handle_t default_rxobj; + bool tx_shortform_allowed; + u16 tx_vp_offset; + + struct mana_tx_qp *tx_qp; + + /* Indirection Table for RX & TX. The values are queue indexes */ + u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Indirection table containing RxObject Handles */ + mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Hash key used by the NIC */ + u8 hashkey[MANA_HASH_KEY_SIZE]; + + /* This points to an array of num_queues of RQ pointers. */ + struct mana_rxq **rxqs; + + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ + unsigned int max_queues; + unsigned int num_queues; + + mana_handle_t port_handle; + + u16 port_idx; + + bool port_is_up; + bool port_st_save; /* Saved port state */ + + struct mana_ethtool_stats eth_stats; +}; + +int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); + +int mana_alloc_queues(struct net_device *ndev); +int mana_attach(struct net_device *ndev); +int mana_detach(struct net_device *ndev, bool from_close); + +int mana_probe(struct gdma_dev *gd); +void mana_remove(struct gdma_dev *gd); + +extern const struct ethtool_ops mana_ethtool_ops; + +struct mana_obj_spec { + u32 queue_index; + u64 gdma_region; + u32 queue_size; + u32 attached_eq; + u32 modr_ctx_id; +}; + +enum mana_command_code { + MANA_QUERY_DEV_CONFIG = 0x20001, + MANA_QUERY_GF_STAT = 0x20002, + MANA_CONFIG_VPORT_TX = 0x20003, + MANA_CREATE_WQ_OBJ = 0x20004, + MANA_DESTROY_WQ_OBJ = 0x20005, + MANA_FENCE_RQ = 0x20006, + MANA_CONFIG_VPORT_RX = 0x20007, + MANA_QUERY_VPORT_CONFIG = 0x20008, +}; + +/* Query Device Configuration */ +struct mana_query_device_cfg_req { + struct gdma_req_hdr hdr; + + /* Driver Capability flags */ + u64 drv_cap_flags1; + u64 drv_cap_flags2; + u64 drv_cap_flags3; + u64 drv_cap_flags4; + + u32 proto_major_ver; + u32 proto_minor_ver; + u32 proto_micro_ver; + + u32 reserved; +}; /* HW DATA */ + +struct mana_query_device_cfg_resp { + struct gdma_resp_hdr hdr; + + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; + + u16 max_num_vports; + u16 reserved; + u32 max_num_eqs; +}; /* HW DATA */ + +/* Query vPort Configuration */ +struct mana_query_vport_cfg_req { + struct gdma_req_hdr hdr; + u32 vport_index; +}; /* HW DATA */ + +struct mana_query_vport_cfg_resp { + struct gdma_resp_hdr hdr; + u32 max_num_sq; + u32 max_num_rq; + u32 num_indirection_ent; + u32 reserved1; + u8 mac_addr[6]; + u8 reserved2[2]; + mana_handle_t vport; +}; /* HW DATA */ + +/* Configure vPort */ +struct mana_config_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 pdid; + u32 doorbell_pageid; +}; /* HW DATA */ + +struct mana_config_vport_resp { + struct gdma_resp_hdr hdr; + u16 tx_vport_offset; + u8 short_form_allowed; + u8 reserved; +}; /* HW DATA */ + +/* Create WQ Object */ +struct mana_create_wqobj_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 wq_type; + u32 reserved; + u64 wq_gdma_region; + u64 cq_gdma_region; + u32 wq_size; + u32 cq_size; + u32 cq_moderation_ctx_id; + u32 cq_parent_qid; +}; /* HW DATA */ + +struct mana_create_wqobj_resp { + struct gdma_resp_hdr hdr; + u32 wq_id; + u32 cq_id; + mana_handle_t wq_obj; +}; /* HW DATA */ + +/* Destroy WQ Object */ +struct mana_destroy_wqobj_req { + struct gdma_req_hdr hdr; + u32 wq_type; + u32 reserved; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_destroy_wqobj_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Fence RQ */ +struct mana_fence_rq_req { + struct gdma_req_hdr hdr; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_fence_rq_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Configure vPort Rx Steering */ +struct mana_cfg_rx_steer_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u16 num_indir_entries; + u16 indir_tab_offset; + u32 rx_enable; + u32 rss_enable; + u8 update_default_rxobj; + u8 update_hashkey; + u8 update_indir_tab; + u8 reserved; + mana_handle_t default_rxobj; + u8 hashkey[MANA_HASH_KEY_SIZE]; +}; /* HW DATA */ + +struct mana_cfg_rx_steer_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define MANA_MAX_NUM_QUEUES 16 + +#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) + +struct mana_tx_package { + struct gdma_wqe_request wqe_req; + struct gdma_sge sgl_array[5]; + struct gdma_sge *sgl_ptr; + + struct mana_tx_oob tx_oob; + + struct gdma_posted_wqe_info wqe_info; +}; + +#endif /* _MANA_H */ diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c new file mode 100644 index 000000000000..a744ca0b6c19 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -0,0 +1,1895 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/mm.h> + +#include <net/checksum.h> +#include <net/ip6_checksum.h> + +#include "mana.h" + +/* Microsoft Azure Network Adapter (MANA) functions */ + +static int mana_open(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + err = mana_alloc_queues(ndev); + if (err) + return err; + + apc->port_is_up = true; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + netif_carrier_on(ndev); + netif_tx_wake_all_queues(ndev); + + return 0; +} + +static int mana_close(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + if (!apc->port_is_up) + return 0; + + return mana_detach(ndev, true); +} + +static bool mana_can_tx(struct gdma_queue *wq) +{ + return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; +} + +static unsigned int mana_checksum_info(struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *ip = ip_hdr(skb); + + if (ip->protocol == IPPROTO_TCP) + return IPPROTO_TCP; + + if (ip->protocol == IPPROTO_UDP) + return IPPROTO_UDP; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6 = ipv6_hdr(skb); + + if (ip6->nexthdr == IPPROTO_TCP) + return IPPROTO_TCP; + + if (ip6->nexthdr == IPPROTO_UDP) + return IPPROTO_UDP; + } + + /* No csum offloading */ + return 0; +} + +static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, + struct mana_tx_package *tp) +{ + struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + struct gdma_dev *gd = apc->ac->gdma_dev; + struct gdma_context *gc; + struct device *dev; + skb_frag_t *frag; + dma_addr_t da; + int i; + + gc = gd->gdma_context; + dev = gc->dev; + da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); + + if (dma_mapping_error(dev, da)) + return -ENOMEM; + + ash->dma_handle[0] = da; + ash->size[0] = skb_headlen(skb); + + tp->wqe_req.sgl[0].address = ash->dma_handle[0]; + tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; + tp->wqe_req.sgl[0].size = ash->size[0]; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); + + if (dma_mapping_error(dev, da)) + goto frag_err; + + ash->dma_handle[i + 1] = da; + ash->size[i + 1] = skb_frag_size(frag); + + tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1]; + tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey; + tp->wqe_req.sgl[i + 1].size = ash->size[i + 1]; + } + + return 0; + +frag_err: + for (i = i - 1; i >= 0; i--) + dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], + DMA_TO_DEVICE); + + dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); + + return -ENOMEM; +} + +static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; + struct mana_port_context *apc = netdev_priv(ndev); + u16 txq_idx = skb_get_queue_mapping(skb); + struct gdma_dev *gd = apc->ac->gdma_dev; + bool ipv4 = false, ipv6 = false; + struct mana_tx_package pkg = {}; + struct netdev_queue *net_txq; + struct mana_stats *tx_stats; + struct gdma_queue *gdma_sq; + unsigned int csum_type; + struct mana_txq *txq; + struct mana_cq *cq; + int err, len; + + if (unlikely(!apc->port_is_up)) + goto tx_drop; + + if (skb_cow_head(skb, MANA_HEADROOM)) + goto tx_drop_count; + + txq = &apc->tx_qp[txq_idx].txq; + gdma_sq = txq->gdma_sq; + cq = &apc->tx_qp[txq_idx].tx_cq; + + pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; + pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; + + if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { + pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; + pkt_fmt = MANA_LONG_PKT_FMT; + } else { + pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; + } + + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; + + if (pkt_fmt == MANA_SHORT_PKT_FMT) + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); + else + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); + + pkg.wqe_req.inline_oob_data = &pkg.tx_oob; + pkg.wqe_req.flags = 0; + pkg.wqe_req.client_data_unit = 0; + + pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; + WARN_ON_ONCE(pkg.wqe_req.num_sge > 30); + + if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { + pkg.wqe_req.sgl = pkg.sgl_array; + } else { + pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, + sizeof(struct gdma_sge), + GFP_ATOMIC); + if (!pkg.sgl_ptr) + goto tx_drop_count; + + pkg.wqe_req.sgl = pkg.sgl_ptr; + } + + if (skb->protocol == htons(ETH_P_IP)) + ipv4 = true; + else if (skb->protocol == htons(ETH_P_IPV6)) + ipv6 = true; + + if (skb_is_gso(skb)) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_iphdr_csum = 1; + pkg.tx_oob.s_oob.comp_tcp_csum = 1; + pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); + + pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; + pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; + if (ipv4) { + ip_hdr(skb)->tot_len = 0; + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } else { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + csum_type = mana_checksum_info(skb); + + if (csum_type == IPPROTO_TCP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_tcp_csum = 1; + pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); + + } else if (csum_type == IPPROTO_UDP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_udp_csum = 1; + } else { + /* Can't do offload of this type of checksum */ + if (skb_checksum_help(skb)) + goto free_sgl_ptr; + } + } + + if (mana_map_skb(skb, apc, &pkg)) + goto free_sgl_ptr; + + skb_queue_tail(&txq->pending_skbs, skb); + + len = skb->len; + net_txq = netdev_get_tx_queue(ndev, txq_idx); + + err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, + (struct gdma_posted_wqe_info *)skb->cb); + if (!mana_can_tx(gdma_sq)) { + netif_tx_stop_queue(net_txq); + apc->eth_stats.stop_queue++; + } + + if (err) { + (void)skb_dequeue_tail(&txq->pending_skbs); + netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); + err = NETDEV_TX_BUSY; + goto tx_busy; + } + + err = NETDEV_TX_OK; + atomic_inc(&txq->pending_sends); + + mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); + + /* skb may be freed after mana_gd_post_work_request. Do not use it. */ + skb = NULL; + + tx_stats = &txq->stats; + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->packets++; + tx_stats->bytes += len; + u64_stats_update_end(&tx_stats->syncp); + +tx_busy: + if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { + netif_tx_wake_queue(net_txq); + apc->eth_stats.wake_queue++; + } + + kfree(pkg.sgl_ptr); + return err; + +free_sgl_ptr: + kfree(pkg.sgl_ptr); +tx_drop_count: + ndev->stats.tx_dropped++; +tx_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static void mana_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *st) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + struct mana_stats *stats; + unsigned int start; + u64 packets, bytes; + int q; + + if (!apc->port_is_up) + return; + + netdev_stats_to_stats64(st, &ndev->stats); + + for (q = 0; q < num_queues; q++) { + stats = &apc->rxqs[q]->stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + st->rx_packets += packets; + st->rx_bytes += bytes; + } + + for (q = 0; q < num_queues; q++) { + stats = &apc->tx_qp[q].txq.stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + st->tx_packets += packets; + st->tx_bytes += bytes; + } +} + +static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, + int old_q) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 hash = skb_get_hash(skb); + struct sock *sk = skb->sk; + int txq; + + txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + + if (txq != old_q && sk && sk_fullsock(sk) && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, txq); + + return txq; +} + +static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + int txq; + + if (ndev->real_num_tx_queues == 1) + return 0; + + txq = sk_tx_queue_get(skb->sk); + + if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { + if (skb_rx_queue_recorded(skb)) + txq = skb_get_rx_queue(skb); + else + txq = mana_get_tx_queue(ndev, skb, txq); + } + + return txq; +} + +static const struct net_device_ops mana_devops = { + .ndo_open = mana_open, + .ndo_stop = mana_close, + .ndo_select_queue = mana_select_queue, + .ndo_start_xmit = mana_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = mana_get_stats64, +}; + +static void mana_cleanup_port_context(struct mana_port_context *apc) +{ + kfree(apc->rxqs); + apc->rxqs = NULL; +} + +static int mana_init_port_context(struct mana_port_context *apc) +{ + apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), + GFP_KERNEL); + + return !apc->rxqs ? -ENOMEM : 0; +} + +static int mana_send_request(struct mana_context *ac, void *in_buf, + u32 in_len, void *out_buf, u32 out_len) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct gdma_resp_hdr *resp = out_buf; + struct gdma_req_hdr *req = in_buf; + struct device *dev = gc->dev; + static atomic_t activity_id; + int err; + + req->dev_id = gc->mana.dev_id; + req->activity_id = atomic_inc_return(&activity_id); + + err = mana_gd_send_request(gc, in_len, in_buf, out_len, + out_buf); + if (err || resp->status) { + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); + return err ? err : -EPROTO; + } + + if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || + req->activity_id != resp->activity_id) { + dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", + req->dev_id.as_uint32, resp->dev_id.as_uint32, + req->activity_id, resp->activity_id); + return -EPROTO; + } + + return 0; +} + +static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, + const enum mana_command_code expected_code, + const u32 min_size) +{ + if (resp_hdr->response.msg_type != expected_code) + return -EPROTO; + + if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) + return -EPROTO; + + if (resp_hdr->response.msg_size < min_size) + return -EPROTO; + + return 0; +} + +static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, + u32 proto_minor_ver, u32 proto_micro_ver, + u16 *max_num_vports) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct mana_query_device_cfg_resp resp = {}; + struct mana_query_device_cfg_req req = {}; + struct device *dev = gc->dev; + int err = 0; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(req), sizeof(resp)); + req.proto_major_ver = proto_major_ver; + req.proto_minor_ver = proto_minor_ver; + req.proto_micro_ver = proto_micro_ver; + + err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); + if (err) { + dev_err(dev, "Failed to query config: %d", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(resp)); + if (err || resp.hdr.status) { + dev_err(dev, "Invalid query result: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + return err; + } + + *max_num_vports = resp.max_num_vports; + + return 0; +} + +static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, + u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) +{ + struct mana_query_vport_cfg_resp resp = {}; + struct mana_query_vport_cfg_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, + sizeof(req), sizeof(resp)); + + req.vport_index = vport_index; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) + return err; + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, + sizeof(resp)); + if (err) + return err; + + if (resp.hdr.status) + return -EPROTO; + + *max_sq = resp.max_num_sq; + *max_rq = resp.max_num_rq; + *num_indir_entry = resp.num_indirection_ent; + + apc->port_handle = resp.vport; + ether_addr_copy(apc->mac_addr, resp.mac_addr); + + return 0; +} + +static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id) +{ + struct mana_config_vport_resp resp = {}; + struct mana_config_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + req.pdid = protection_dom_id; + req.doorbell_pageid = doorbell_pg_id; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", + err, resp.hdr.status); + if (!err) + err = -EPROTO; + + goto out; + } + + apc->tx_shortform_allowed = resp.short_form_allowed; + apc->tx_vp_offset = resp.tx_vport_offset; +out: + return err; +} + +static int mana_cfg_vport_steering(struct mana_port_context *apc, + enum TRI_STATE rx, + bool update_default_rxobj, bool update_key, + bool update_tab) +{ + u16 num_entries = MANA_INDIRECT_TABLE_SIZE; + struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_resp resp = {}; + struct net_device *ndev = apc->ndev; + mana_handle_t *req_indir_tab; + u32 req_buf_size; + int err; + + req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req = kzalloc(req_buf_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, + sizeof(resp)); + + req->vport = apc->port_handle; + req->num_indir_entries = num_entries; + req->indir_tab_offset = sizeof(*req); + req->rx_enable = rx; + req->rss_enable = apc->rss_state; + req->update_default_rxobj = update_default_rxobj; + req->update_hashkey = update_key; + req->update_indir_tab = update_tab; + req->default_rxobj = apc->default_rxobj; + + if (update_key) + memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); + + if (update_tab) { + req_indir_tab = (mana_handle_t *)(req + 1); + memcpy(req_indir_tab, apc->rxobj_table, + req->num_indir_entries * sizeof(mana_handle_t)); + } + + err = mana_send_request(apc->ac, req, req_buf_size, &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, + sizeof(resp)); + if (err) { + netdev_err(ndev, "vPort RX configuration failed: %d\n", err); + goto out; + } + + if (resp.hdr.status) { + netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + } +out: + kfree(req); + return err; +} + +static int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, + struct mana_obj_spec *cq_spec, + mana_handle_t *wq_obj) +{ + struct mana_create_wqobj_resp resp = {}; + struct mana_create_wqobj_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, + sizeof(req), sizeof(resp)); + req.vport = vport; + req.wq_type = wq_type; + req.wq_gdma_region = wq_spec->gdma_region; + req.cq_gdma_region = cq_spec->gdma_region; + req.wq_size = wq_spec->queue_size; + req.cq_size = cq_spec->queue_size; + req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; + req.cq_parent_qid = cq_spec->attached_eq; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to create WQ object: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + goto out; + } + + if (resp.wq_obj == INVALID_MANA_HANDLE) { + netdev_err(ndev, "Got an invalid WQ object handle\n"); + err = -EPROTO; + goto out; + } + + *wq_obj = resp.wq_obj; + wq_spec->queue_index = resp.wq_id; + cq_spec->queue_index = resp.cq_id; + + return 0; +out: + return err; +} + +static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + mana_handle_t wq_obj) +{ + struct mana_destroy_wqobj_resp resp = {}; + struct mana_destroy_wqobj_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, + sizeof(req), sizeof(resp)); + req.wq_type = wq_type; + req.wq_obj_handle = wq_obj; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, + resp.hdr.status); +} + +static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf) +{ + int i; + + for (i = 0; i < CQE_POLLING_BUFFER; i++) + memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp)); +} + +static void mana_destroy_eq(struct gdma_context *gc, + struct mana_port_context *apc) +{ + struct gdma_queue *eq; + int i; + + if (!apc->eqs) + return; + + for (i = 0; i < apc->num_queues; i++) { + eq = apc->eqs[i].eq; + if (!eq) + continue; + + mana_gd_destroy_queue(gc, eq); + } + + kfree(apc->eqs); + apc->eqs = NULL; +} + +static int mana_create_eq(struct mana_port_context *apc) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct gdma_queue_spec spec = {}; + int err; + int i; + + apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq), + GFP_KERNEL); + if (!apc->eqs) + return -ENOMEM; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = EQ_SIZE; + spec.eq.callback = NULL; + spec.eq.context = apc->eqs; + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + spec.eq.ndev = apc->ndev; + + for (i = 0; i < apc->num_queues; i++) { + mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll); + + err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq); + if (err) + goto out; + } + + return 0; +out: + mana_destroy_eq(gd->gdma_context, apc); + return err; +} + +static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) +{ + u32 used_space_old; + u32 used_space_new; + + used_space_old = wq->head - wq->tail; + used_space_new = wq->head - (wq->tail + num_units); + + if (WARN_ON_ONCE(used_space_new > used_space_old)) + return -ERANGE; + + wq->tail += num_units; + return 0; +} + +static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) +{ + struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct device *dev = gc->dev; + int i; + + dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); + + for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) + dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); +} + +static void mana_poll_tx_cq(struct mana_cq *cq) +{ + struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent; + struct gdma_comp *completions = cq->gdma_comp_buf; + struct gdma_posted_wqe_info *wqe_info; + unsigned int pkt_transmitted = 0; + unsigned int wqe_unit_cnt = 0; + struct mana_txq *txq = cq->txq; + struct mana_port_context *apc; + struct netdev_queue *net_txq; + struct gdma_queue *gdma_wq; + unsigned int avail_space; + struct net_device *ndev; + struct sk_buff *skb; + bool txq_stopped; + int comp_read; + int i; + + ndev = txq->ndev; + apc = netdev_priv(ndev); + + comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, + CQE_POLLING_BUFFER); + + for (i = 0; i < comp_read; i++) { + struct mana_tx_comp_oob *cqe_oob; + + if (WARN_ON_ONCE(!completions[i].is_sq)) + return; + + cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; + if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != + MANA_CQE_COMPLETION)) + return; + + switch (cqe_oob->cqe_hdr.cqe_type) { + case CQE_TX_OKAY: + break; + + case CQE_TX_SA_DROP: + case CQE_TX_MTU_DROP: + case CQE_TX_INVALID_OOB: + case CQE_TX_INVALID_ETH_TYPE: + case CQE_TX_HDR_PROCESSING_ERROR: + case CQE_TX_VF_DISABLED: + case CQE_TX_VPORT_IDX_OUT_OF_RANGE: + case CQE_TX_VPORT_DISABLED: + case CQE_TX_VLAN_TAGGING_VIOLATION: + WARN_ONCE(1, "TX: CQE error %d: ignored.\n", + cqe_oob->cqe_hdr.cqe_type); + break; + + default: + /* If the CQE type is unexpected, log an error, assert, + * and go through the error path. + */ + WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", + cqe_oob->cqe_hdr.cqe_type); + return; + } + + if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) + return; + + skb = skb_dequeue(&txq->pending_skbs); + if (WARN_ON_ONCE(!skb)) + return; + + wqe_info = (struct gdma_posted_wqe_info *)skb->cb; + wqe_unit_cnt += wqe_info->wqe_size_in_bu; + + mana_unmap_skb(skb, apc); + + napi_consume_skb(skb, gdma_eq->eq.budget); + + pkt_transmitted++; + } + + if (WARN_ON_ONCE(wqe_unit_cnt == 0)) + return; + + mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); + + gdma_wq = txq->gdma_sq; + avail_space = mana_gd_wq_avail_space(gdma_wq); + + /* Ensure tail updated before checking q stop */ + smp_mb(); + + net_txq = txq->net_txq; + txq_stopped = netif_tx_queue_stopped(net_txq); + + /* Ensure checking txq_stopped before apc->port_is_up. */ + smp_rmb(); + + if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { + netif_tx_wake_queue(net_txq); + apc->eth_stats.wake_queue++; + } + + if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) + WARN_ON_ONCE(1); +} + +static void mana_post_pkt_rxq(struct mana_rxq *rxq) +{ + struct mana_recv_buf_oob *recv_buf_oob; + u32 curr_index; + int err; + + curr_index = rxq->buf_index++; + if (rxq->buf_index == rxq->num_rx_buf) + rxq->buf_index = 0; + + recv_buf_oob = &rxq->rx_oobs[curr_index]; + + err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, + &recv_buf_oob->wqe_inf); + if (WARN_ON_ONCE(err)) + return; + + WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); +} + +static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, + struct mana_rxq *rxq) +{ + struct mana_stats *rx_stats = &rxq->stats; + struct net_device *ndev = rxq->ndev; + uint pkt_len = cqe->ppi[0].pkt_len; + struct mana_port_context *apc; + u16 rxq_idx = rxq->rxq_idx; + struct napi_struct *napi; + struct gdma_queue *eq; + struct sk_buff *skb; + u32 hash_value; + + apc = netdev_priv(ndev); + eq = apc->eqs[rxq_idx].eq; + eq->eq.work_done++; + napi = &eq->eq.napi; + + if (!buf_va) { + ++ndev->stats.rx_dropped; + return; + } + + skb = build_skb(buf_va, PAGE_SIZE); + + if (!skb) { + free_page((unsigned long)buf_va); + ++ndev->stats.rx_dropped; + return; + } + + skb_put(skb, pkt_len); + skb->dev = napi->dev; + + skb->protocol = eth_type_trans(skb, ndev); + skb_checksum_none_assert(skb); + skb_record_rx_queue(skb, rxq_idx); + + if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { + if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { + hash_value = cqe->ppi[0].pkt_hash; + + if (cqe->rx_hashtype & MANA_HASH_L4) + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); + else + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); + } + + napi_gro_receive(napi, skb); + + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + u64_stats_update_end(&rx_stats->syncp); +} + +static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + struct gdma_comp *cqe) +{ + struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; + struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + struct net_device *ndev = rxq->ndev; + struct mana_recv_buf_oob *rxbuf_oob; + struct device *dev = gc->dev; + void *new_buf, *old_buf; + struct page *new_page; + u32 curr, pktlen; + dma_addr_t da; + + switch (oob->cqe_hdr.cqe_type) { + case CQE_RX_OKAY: + break; + + case CQE_RX_TRUNCATED: + netdev_err(ndev, "Dropped a truncated packet\n"); + return; + + case CQE_RX_COALESCED_4: + netdev_err(ndev, "RX coalescing is unsupported\n"); + return; + + case CQE_RX_OBJECT_FENCE: + netdev_err(ndev, "RX Fencing is unsupported\n"); + return; + + default: + netdev_err(ndev, "Unknown RX CQE type = %d\n", + oob->cqe_hdr.cqe_type); + return; + } + + if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) + return; + + pktlen = oob->ppi[0].pkt_len; + + if (pktlen == 0) { + /* data packets should never have packetlength of zero */ + netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", + rxq->gdma_id, cq->gdma_id, rxq->rxobj); + return; + } + + curr = rxq->buf_index; + rxbuf_oob = &rxq->rx_oobs[curr]; + WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); + + new_page = alloc_page(GFP_ATOMIC); + + if (new_page) { + da = dma_map_page(dev, new_page, 0, rxq->datasize, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, da)) { + __free_page(new_page); + new_page = NULL; + } + } + + new_buf = new_page ? page_to_virt(new_page) : NULL; + + if (new_buf) { + dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize, + DMA_FROM_DEVICE); + + old_buf = rxbuf_oob->buf_va; + + /* refresh the rxbuf_oob with the new page */ + rxbuf_oob->buf_va = new_buf; + rxbuf_oob->buf_dma_addr = da; + rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr; + } else { + old_buf = NULL; /* drop the packet if no memory */ + } + + mana_rx_skb(old_buf, oob, rxq); + + mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); + + mana_post_pkt_rxq(rxq); +} + +static void mana_poll_rx_cq(struct mana_cq *cq) +{ + struct gdma_comp *comp = cq->gdma_comp_buf; + u32 comp_read, i; + + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + + for (i = 0; i < comp_read; i++) { + if (WARN_ON_ONCE(comp[i].is_sq)) + return; + + /* verify recv cqe references the right rxq */ + if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) + return; + + mana_process_rx_cqe(cq->rxq, cq, &comp[i]); + } +} + +static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +{ + struct mana_cq *cq = context; + + WARN_ON_ONCE(cq->gdma_cq != gdma_queue); + + if (cq->type == MANA_CQ_TYPE_RX) + mana_poll_rx_cq(cq); + else + mana_poll_tx_cq(cq); + + mana_gd_arm_cq(gdma_queue); +} + +static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + + if (!cq->gdma_cq) + return; + + mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); +} + +static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + + if (!txq->gdma_sq) + return; + + mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); +} + +static void mana_destroy_txq(struct mana_port_context *apc) +{ + int i; + + if (!apc->tx_qp) + return; + + for (i = 0; i < apc->num_queues; i++) { + mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); + + mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); + + mana_deinit_txq(apc, &apc->tx_qp[i].txq); + } + + kfree(apc->tx_qp); + apc->tx_qp = NULL; +} + +static int mana_create_txq(struct mana_port_context *apc, + struct net_device *net) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; + struct gdma_context *gc; + struct mana_txq *txq; + struct mana_cq *cq; + u32 txq_size; + u32 cq_size; + int err; + int i; + + apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), + GFP_KERNEL); + if (!apc->tx_qp) + return -ENOMEM; + + /* The minimum size of the WQE is 32 bytes, hence + * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs + * the SQ can store. This value is then used to size other queues + * to prevent overflow. + */ + txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; + BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + + cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; + cq_size = PAGE_ALIGN(cq_size); + + gc = gd->gdma_context; + + for (i = 0; i < apc->num_queues; i++) { + apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; + + /* Create SQ */ + txq = &apc->tx_qp[i].txq; + + u64_stats_init(&txq->stats.syncp); + txq->ndev = net; + txq->net_txq = netdev_get_tx_queue(net, i); + txq->vp_offset = apc->tx_vp_offset; + skb_queue_head_init(&txq->pending_skbs); + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_SQ; + spec.monitor_avl_buf = true; + spec.queue_size = txq_size; + err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); + if (err) + goto out; + + /* Create SQ's CQ */ + cq = &apc->tx_qp[i].tx_cq; + cq->gdma_comp_buf = apc->eqs[i].cqe_poll; + cq->type = MANA_CQ_TYPE_TX; + + cq->txq = txq; + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_cq_handler; + spec.cq.parent_eq = apc->eqs[i].eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) + goto out; + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + + wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region; + wq_spec.queue_size = txq->gdma_sq->queue_size; + + cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; + + err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, + &wq_spec, &cq_spec, + &apc->tx_qp[i].tx_object); + + if (err) + goto out; + + txq->gdma_sq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + + txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + + txq->gdma_txq_id = txq->gdma_sq->id; + + cq->gdma_id = cq->gdma_cq->id; + + if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) + return -EINVAL; + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + + mana_gd_arm_cq(cq->gdma_cq); + } + + return 0; +out: + mana_destroy_txq(apc); + return err; +} + +static void mana_napi_sync_for_rx(struct mana_rxq *rxq) +{ + struct net_device *ndev = rxq->ndev; + struct mana_port_context *apc; + u16 rxq_idx = rxq->rxq_idx; + struct napi_struct *napi; + struct gdma_queue *eq; + + apc = netdev_priv(ndev); + eq = apc->eqs[rxq_idx].eq; + napi = &eq->eq.napi; + + napi_synchronize(napi); +} + +static void mana_destroy_rxq(struct mana_port_context *apc, + struct mana_rxq *rxq, bool validate_state) + +{ + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; + int i; + + if (!rxq) + return; + + if (validate_state) + mana_napi_sync_for_rx(rxq); + + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + + mana_deinit_cq(apc, &rxq->rx_cq); + + for (i = 0; i < rxq->num_rx_buf; i++) { + rx_oob = &rxq->rx_oobs[i]; + + if (!rx_oob->buf_va) + continue; + + dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize, + DMA_FROM_DEVICE); + + free_page((unsigned long)rx_oob->buf_va); + rx_oob->buf_va = NULL; + } + + if (rxq->gdma_rq) + mana_gd_destroy_queue(gc, rxq->gdma_rq); + + kfree(rxq); +} + +#define MANA_WQE_HEADER_SIZE 16 +#define MANA_WQE_SGE_SIZE 16 + +static int mana_alloc_rx_wqe(struct mana_port_context *apc, + struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) +{ + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; + struct page *page; + dma_addr_t da; + u32 buf_idx; + + WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); + + *rxq_size = 0; + *cq_size = 0; + + for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { + rx_oob = &rxq->rx_oobs[buf_idx]; + memset(rx_oob, 0, sizeof(*rx_oob)); + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, da)) { + __free_page(page); + return -ENOMEM; + } + + rx_oob->buf_va = page_to_virt(page); + rx_oob->buf_dma_addr = da; + + rx_oob->num_sge = 1; + rx_oob->sgl[0].address = rx_oob->buf_dma_addr; + rx_oob->sgl[0].size = rxq->datasize; + rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; + + rx_oob->wqe_req.sgl = rx_oob->sgl; + rx_oob->wqe_req.num_sge = rx_oob->num_sge; + rx_oob->wqe_req.inline_oob_size = 0; + rx_oob->wqe_req.inline_oob_data = NULL; + rx_oob->wqe_req.flags = 0; + rx_oob->wqe_req.client_data_unit = 0; + + *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + + MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); + *cq_size += COMP_ENTRY_SIZE; + } + + return 0; +} + +static int mana_push_wqe(struct mana_rxq *rxq) +{ + struct mana_recv_buf_oob *rx_oob; + u32 buf_idx; + int err; + + for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { + rx_oob = &rxq->rx_oobs[buf_idx]; + + err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, + &rx_oob->wqe_inf); + if (err) + return -ENOSPC; + } + + return 0; +} + +static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + u32 rxq_idx, struct mana_eq *eq, + struct net_device *ndev) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; + struct mana_cq *cq = NULL; + struct gdma_context *gc; + u32 cq_size, rq_size; + struct mana_rxq *rxq; + int err; + + gc = gd->gdma_context; + + rxq = kzalloc(sizeof(*rxq) + + RX_BUFFERS_PER_QUEUE * sizeof(struct mana_recv_buf_oob), + GFP_KERNEL); + if (!rxq) + return NULL; + + rxq->ndev = ndev; + rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->rxq_idx = rxq_idx; + rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64); + rxq->rxobj = INVALID_MANA_HANDLE; + + err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); + if (err) + goto out; + + rq_size = PAGE_ALIGN(rq_size); + cq_size = PAGE_ALIGN(cq_size); + + /* Create RQ */ + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_RQ; + spec.monitor_avl_buf = true; + spec.queue_size = rq_size; + err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); + if (err) + goto out; + + /* Create RQ's CQ */ + cq = &rxq->rx_cq; + cq->gdma_comp_buf = eq->cqe_poll; + cq->type = MANA_CQ_TYPE_RX; + cq->rxq = rxq; + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_cq_handler; + spec.cq.parent_eq = eq->eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) + goto out; + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region; + wq_spec.queue_size = rxq->gdma_rq->queue_size; + + cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; + + err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, + &wq_spec, &cq_spec, &rxq->rxobj); + if (err) + goto out; + + rxq->gdma_rq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + + rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + + rxq->gdma_id = rxq->gdma_rq->id; + cq->gdma_id = cq->gdma_cq->id; + + err = mana_push_wqe(rxq); + if (err) + goto out; + + if (cq->gdma_id >= gc->max_num_cqs) + goto out; + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + + mana_gd_arm_cq(cq->gdma_cq); +out: + if (!err) + return rxq; + + netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); + + mana_destroy_rxq(apc, rxq, false); + + if (cq) + mana_deinit_cq(apc, cq); + + return NULL; +} + +static int mana_add_rx_queues(struct mana_port_context *apc, + struct net_device *ndev) +{ + struct mana_rxq *rxq; + int err = 0; + int i; + + for (i = 0; i < apc->num_queues; i++) { + rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev); + if (!rxq) { + err = -ENOMEM; + goto out; + } + + u64_stats_init(&rxq->stats.syncp); + + apc->rxqs[i] = rxq; + } + + apc->default_rxobj = apc->rxqs[0]->rxobj; +out: + return err; +} + +static void mana_destroy_vport(struct mana_port_context *apc) +{ + struct mana_rxq *rxq; + u32 rxq_idx; + + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + if (!rxq) + continue; + + mana_destroy_rxq(apc, rxq, true); + apc->rxqs[rxq_idx] = NULL; + } + + mana_destroy_txq(apc); +} + +static int mana_create_vport(struct mana_port_context *apc, + struct net_device *net) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + + apc->default_rxobj = INVALID_MANA_HANDLE; + + err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); + if (err) + return err; + + return mana_create_txq(apc, net); +} + +static void mana_rss_table_init(struct mana_port_context *apc) +{ + int i; + + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + apc->indir_table[i] = + ethtool_rxfh_indir_default(i, apc->num_queues); +} + +int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, + bool update_hash, bool update_tab) +{ + u32 queue_idx; + int i; + + if (update_tab) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + queue_idx = apc->indir_table[i]; + apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; + } + } + + return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); +} + +static int mana_init_port(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 max_txq, max_rxq, max_queues; + int port_idx = apc->port_idx; + u32 num_indirect_entries; + int err; + + err = mana_init_port_context(apc); + if (err) + return err; + + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, + &num_indirect_entries); + if (err) { + netdev_err(ndev, "Failed to query info for vPort 0\n"); + goto reset_apc; + } + + max_queues = min_t(u32, max_txq, max_rxq); + if (apc->max_queues > max_queues) + apc->max_queues = max_queues; + + if (apc->num_queues > apc->max_queues) + apc->num_queues = apc->max_queues; + + ether_addr_copy(ndev->dev_addr, apc->mac_addr); + + return 0; + +reset_apc: + kfree(apc->rxqs); + apc->rxqs = NULL; + return err; +} + +int mana_alloc_queues(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + + err = mana_create_eq(apc); + if (err) + return err; + + err = mana_create_vport(apc, ndev); + if (err) + goto destroy_eq; + + err = netif_set_real_num_tx_queues(ndev, apc->num_queues); + if (err) + goto destroy_vport; + + err = mana_add_rx_queues(apc, ndev); + if (err) + goto destroy_vport; + + apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; + + err = netif_set_real_num_rx_queues(ndev, apc->num_queues); + if (err) + goto destroy_vport; + + mana_rss_table_init(apc); + + err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); + if (err) + goto destroy_vport; + + return 0; + +destroy_vport: + mana_destroy_vport(apc); +destroy_eq: + mana_destroy_eq(gd->gdma_context, apc); + return err; +} + +int mana_attach(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + ASSERT_RTNL(); + + err = mana_init_port(ndev); + if (err) + return err; + + err = mana_alloc_queues(ndev); + if (err) { + kfree(apc->rxqs); + apc->rxqs = NULL; + return err; + } + + netif_device_attach(ndev); + + apc->port_is_up = apc->port_st_save; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + if (apc->port_is_up) { + netif_carrier_on(ndev); + netif_tx_wake_all_queues(ndev); + } + + return 0; +} + +static int mana_dealloc_queues(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct mana_txq *txq; + int i, err; + + if (apc->port_is_up) + return -EINVAL; + + /* No packet can be transmitted now since apc->port_is_up is false. + * There is still a tiny chance that mana_poll_tx_cq() can re-enable + * a txq because it may not timely see apc->port_is_up being cleared + * to false, but it doesn't matter since mana_start_xmit() drops any + * new packets due to apc->port_is_up being false. + * + * Drain all the in-flight TX packets + */ + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + + while (atomic_read(&txq->pending_sends) > 0) + usleep_range(1000, 2000); + } + + /* We're 100% sure the queues can no longer be woken up, because + * we're sure now mana_poll_tx_cq() can't be running. + */ + + apc->rss_state = TRI_STATE_FALSE; + err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); + if (err) { + netdev_err(ndev, "Failed to disable vPort: %d\n", err); + return err; + } + + /* TODO: Implement RX fencing */ + ssleep(1); + + mana_destroy_vport(apc); + + mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc); + + return 0; +} + +int mana_detach(struct net_device *ndev, bool from_close) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + ASSERT_RTNL(); + + apc->port_st_save = apc->port_is_up; + apc->port_is_up = false; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + if (apc->port_st_save) { + err = mana_dealloc_queues(ndev); + if (err) + return err; + } + + if (!from_close) { + netif_device_detach(ndev); + mana_cleanup_port_context(apc); + } + + return 0; +} + +static int mana_probe_port(struct mana_context *ac, int port_idx, + struct net_device **ndev_storage) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct mana_port_context *apc; + struct net_device *ndev; + int err; + + ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), + gc->max_num_queues); + if (!ndev) + return -ENOMEM; + + *ndev_storage = ndev; + + apc = netdev_priv(ndev); + apc->ac = ac; + apc->ndev = ndev; + apc->max_queues = gc->max_num_queues; + apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES); + apc->port_handle = INVALID_MANA_HANDLE; + apc->port_idx = port_idx; + + ndev->netdev_ops = &mana_devops; + ndev->ethtool_ops = &mana_ethtool_ops; + ndev->mtu = ETH_DATA_LEN; + ndev->max_mtu = ndev->mtu; + ndev->min_mtu = ndev->mtu; + ndev->needed_headroom = MANA_HEADROOM; + SET_NETDEV_DEV(ndev, gc->dev); + + netif_carrier_off(ndev); + + netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); + + err = mana_init_port(ndev); + if (err) + goto free_net; + + netdev_lockdep_set_classes(ndev); + + ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + ndev->hw_features |= NETIF_F_RXCSUM; + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->hw_features |= NETIF_F_RXHASH; + ndev->features = ndev->hw_features; + ndev->vlan_features = 0; + + err = register_netdev(ndev); + if (err) { + netdev_err(ndev, "Unable to register netdev.\n"); + goto reset_apc; + } + + return 0; + +reset_apc: + kfree(apc->rxqs); + apc->rxqs = NULL; +free_net: + *ndev_storage = NULL; + netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); + free_netdev(ndev); + return err; +} + +int mana_probe(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct device *dev = gc->dev; + struct mana_context *ac; + int err; + int i; + + dev_info(dev, + "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", + MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); + + err = mana_gd_register_device(gd); + if (err) + return err; + + ac = kzalloc(sizeof(*ac), GFP_KERNEL); + if (!ac) + return -ENOMEM; + + ac->gdma_dev = gd; + ac->num_ports = 1; + gd->driver_data = ac; + + err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, + MANA_MICRO_VERSION, &ac->num_ports); + if (err) + goto out; + + if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) + ac->num_ports = MAX_PORTS_IN_MANA_DEV; + + for (i = 0; i < ac->num_ports; i++) { + err = mana_probe_port(ac, i, &ac->ports[i]); + if (err) + break; + } +out: + if (err) + mana_remove(gd); + + return err; +} + +void mana_remove(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct mana_context *ac = gd->driver_data; + struct device *dev = gc->dev; + struct net_device *ndev; + int i; + + for (i = 0; i < ac->num_ports; i++) { + ndev = ac->ports[i]; + if (!ndev) { + if (i == 0) + dev_err(dev, "No net device to remove\n"); + goto out; + } + + /* All cleanup actions should stay after rtnl_lock(), otherwise + * other functions may access partially cleaned up data. + */ + rtnl_lock(); + + mana_detach(ndev, false); + + unregister_netdevice(ndev); + + rtnl_unlock(); + + free_netdev(ndev); + } +out: + mana_gd_deregister_device(gd); + gd->driver_data = NULL; + gd->gdma_context = NULL; + kfree(ac); +} diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c new file mode 100644 index 000000000000..7e74339f39ae --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> + +#include "mana.h" + +static const struct { + char name[ETH_GSTRING_LEN]; + u16 offset; +} mana_eth_stats[] = { + {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, + {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, +}; + +static int mana_get_sset_count(struct net_device *ndev, int stringset) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + + if (stringset != ETH_SS_STATS) + return -EINVAL; + + return ARRAY_SIZE(mana_eth_stats) + num_queues * 4; +} + +static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + u8 *p = data; + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) { + memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + sprintf(p, "rx_%d_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_bytes", i); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + sprintf(p, "tx_%d_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_bytes", i); + p += ETH_GSTRING_LEN; + } +} + +static void mana_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *e_stats, u64 *data) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + void *eth_stats = &apc->eth_stats; + struct mana_stats *stats; + unsigned int start; + u64 packets, bytes; + int q, i = 0; + + if (!apc->port_is_up) + return; + + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) + data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); + + for (q = 0; q < num_queues; q++) { + stats = &apc->rxqs[q]->stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + } + + for (q = 0; q < num_queues; q++) { + stats = &apc->tx_qp[q].txq.stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + } +} + +static int mana_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *cmd, + u32 *rules) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = apc->num_queues; + return 0; + } + + return -EOPNOTSUPP; +} + +static u32 mana_get_rxfh_key_size(struct net_device *ndev) +{ + return MANA_HASH_KEY_SIZE; +} + +static u32 mana_rss_indir_size(struct net_device *ndev) +{ + return MANA_INDIRECT_TABLE_SIZE; +} + +static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int i; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + + if (indir) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + indir[i] = apc->indir_table[i]; + } + + if (key) + memcpy(key, apc->hashkey, MANA_HASH_KEY_SIZE); + + return 0; +} + +static int mana_set_rxfh(struct net_device *ndev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mana_port_context *apc = netdev_priv(ndev); + bool update_hash = false, update_table = false; + u32 save_table[MANA_INDIRECT_TABLE_SIZE]; + u8 save_key[MANA_HASH_KEY_SIZE]; + int i, err; + + if (!apc->port_is_up) + return -EOPNOTSUPP; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (indir) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + if (indir[i] >= apc->num_queues) + return -EINVAL; + + update_table = true; + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + save_table[i] = apc->indir_table[i]; + apc->indir_table[i] = indir[i]; + } + } + + if (key) { + update_hash = true; + memcpy(save_key, apc->hashkey, MANA_HASH_KEY_SIZE); + memcpy(apc->hashkey, key, MANA_HASH_KEY_SIZE); + } + + err = mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); + + if (err) { /* recover to original values */ + if (update_table) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + apc->indir_table[i] = save_table[i]; + } + + if (update_hash) + memcpy(apc->hashkey, save_key, MANA_HASH_KEY_SIZE); + + mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); + } + + return err; +} + +static void mana_get_channels(struct net_device *ndev, + struct ethtool_channels *channel) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + channel->max_combined = apc->max_queues; + channel->combined_count = apc->num_queues; +} + +static int mana_set_channels(struct net_device *ndev, + struct ethtool_channels *channels) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int new_count = channels->combined_count; + unsigned int old_count = apc->num_queues; + int err, err2; + + if (!apc->port_is_up) + return -EOPNOTSUPP; + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + return err; + } + + apc->num_queues = new_count; + err = mana_attach(ndev); + if (!err) + return 0; + + netdev_err(ndev, "mana_attach failed: %d\n", err); + + /* Try to roll it back to the old configuration. */ + apc->num_queues = old_count; + err2 = mana_attach(ndev); + if (err2) + netdev_err(ndev, "mana re-attach failed: %d\n", err2); + + return err; +} + +const struct ethtool_ops mana_ethtool_ops = { + .get_ethtool_stats = mana_get_ethtool_stats, + .get_sset_count = mana_get_sset_count, + .get_strings = mana_get_strings, + .get_rxnfc = mana_get_rxnfc, + .get_rxfh_key_size = mana_get_rxfh_key_size, + .get_rxfh_indir_size = mana_rss_indir_size, + .get_rxfh = mana_get_rxfh, + .set_rxfh = mana_set_rxfh, + .get_channels = mana_get_channels, + .set_channels = mana_set_channels, +}; diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c new file mode 100644 index 000000000000..da255da62176 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/mm.h> + +#include "shm_channel.h" + +#define PAGE_FRAME_L48_WIDTH_BYTES 6 +#define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8) +#define PAGE_FRAME_L48_MASK 0x0000FFFFFFFFFFFF +#define PAGE_FRAME_H4_WIDTH_BITS 4 +#define VECTOR_MASK 0xFFFF +#define SHMEM_VF_RESET_STATE ((u32)-1) + +#define SMC_MSG_TYPE_ESTABLISH_HWC 1 +#define SMC_MSG_TYPE_ESTABLISH_HWC_VERSION 0 + +#define SMC_MSG_TYPE_DESTROY_HWC 2 +#define SMC_MSG_TYPE_DESTROY_HWC_VERSION 0 + +#define SMC_MSG_DIRECTION_REQUEST 0 +#define SMC_MSG_DIRECTION_RESPONSE 1 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* Shared memory channel protocol header + * + * msg_type: set on request and response; response matches request. + * msg_version: newer PF writes back older response (matching request) + * older PF acts on latest version known and sets that version in result + * (less than request). + * direction: 0 for request, VF->PF; 1 for response, PF->VF. + * status: 0 on request, + * operation result on response (success = 0, failure = 1 or greater). + * reset_vf: If set on either establish or destroy request, indicates perform + * FLR before/after the operation. + * owner_is_pf: 1 indicates PF owned, 0 indicates VF owned. + */ +union smc_proto_hdr { + u32 as_uint32; + + struct { + u8 msg_type : 3; + u8 msg_version : 3; + u8 reserved_1 : 1; + u8 direction : 1; + + u8 status; + + u8 reserved_2; + + u8 reset_vf : 1; + u8 reserved_3 : 6; + u8 owner_is_pf : 1; + }; +}; /* HW DATA */ + +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) + +static int mana_smc_poll_register(void __iomem *base, bool reset) +{ + void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT; + u32 last_dword; + int i; + + /* Poll the hardware for the ownership bit. This should be pretty fast, + * but let's do it in a loop just in case the hardware or the PF + * driver are temporarily busy. + */ + for (i = 0; i < 20 * 1000; i++) { + last_dword = readl(ptr); + + /* shmem reads as 0xFFFFFFFF in the reset case */ + if (reset && last_dword == SHMEM_VF_RESET_STATE) + return 0; + + /* If bit_31 is set, the PF currently owns the SMC. */ + if (!(last_dword & BIT(31))) + return 0; + + usleep_range(1000, 2000); + } + + return -ETIMEDOUT; +} + +static int mana_smc_read_response(struct shm_channel *sc, u32 msg_type, + u32 msg_version, bool reset_vf) +{ + void __iomem *base = sc->base; + union smc_proto_hdr hdr; + int err; + + /* Wait for PF to respond. */ + err = mana_smc_poll_register(base, reset_vf); + if (err) + return err; + + hdr.as_uint32 = readl(base + SMC_LAST_DWORD * SMC_BASIC_UNIT); + + if (reset_vf && hdr.as_uint32 == SHMEM_VF_RESET_STATE) + return 0; + + /* Validate protocol fields from the PF driver */ + if (hdr.msg_type != msg_type || hdr.msg_version > msg_version || + hdr.direction != SMC_MSG_DIRECTION_RESPONSE) { + dev_err(sc->dev, "Wrong SMC response 0x%x, type=%d, ver=%d\n", + hdr.as_uint32, msg_type, msg_version); + return -EPROTO; + } + + /* Validate the operation result */ + if (hdr.status != 0) { + dev_err(sc->dev, "SMC operation failed: 0x%x\n", hdr.status); + return -EPROTO; + } + + return 0; +} + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base) +{ + sc->dev = dev; + sc->base = base; +} + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index) +{ + union smc_proto_hdr *hdr; + u16 all_addr_h4bits = 0; + u16 frame_addr_seq = 0; + u64 frame_addr = 0; + u8 shm_buf[32]; + u64 *shmem; + u32 *dword; + u8 *ptr; + int err; + int i; + + /* Ensure VF already has possession of shared memory */ + err = mana_smc_poll_register(sc->base, false); + if (err) { + dev_err(sc->dev, "Timeout when setting up HWC: %d\n", err); + return err; + } + + if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || + !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + return -EINVAL; + + if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) + return -EINVAL; + + /* Scheme for packing four addresses and extra info into 256 bits. + * + * Addresses must be page frame aligned, so only frame address bits + * are transferred. + * + * 52-bit frame addresses are split into the lower 48 bits and upper + * 4 bits. Lower 48 bits of 4 address are written sequentially from + * the start of the 256-bit shared memory region followed by 16 bits + * containing the upper 4 bits of the 4 addresses in sequence. + * + * A 16 bit EQ vector number fills out the next-to-last 32-bit dword. + * + * The final 32-bit dword is used for protocol control information as + * defined in smc_proto_hdr. + */ + + memset(shm_buf, 0, sizeof(shm_buf)); + ptr = shm_buf; + + /* EQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(eq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* CQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(cq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* RQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(rq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* SQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(sq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* High 4 bits of the four frame addresses */ + *((u16 *)ptr) = all_addr_h4bits; + ptr += sizeof(u16); + + /* EQ MSIX vector number */ + *((u16 *)ptr) = (u16)eq_msix_index; + ptr += sizeof(u16); + + /* 32-bit protocol header in final dword */ + *((u32 *)ptr) = 0; + + hdr = (union smc_proto_hdr *)ptr; + hdr->msg_type = SMC_MSG_TYPE_ESTABLISH_HWC; + hdr->msg_version = SMC_MSG_TYPE_ESTABLISH_HWC_VERSION; + hdr->direction = SMC_MSG_DIRECTION_REQUEST; + hdr->reset_vf = reset_vf; + + /* Write 256-message buffer to shared memory (final 32-bit write + * triggers HW to set possession bit to PF). + */ + dword = (u32 *)shm_buf; + for (i = 0; i < SMC_APERTURE_DWORDS; i++) + writel(*dword++, sc->base + i * SMC_BASIC_UNIT); + + /* Read shmem response (polling for VF possession) and validate. + * For setup, waiting for response on shared memory is not strictly + * necessary, since wait occurs later for results to appear in EQE's. + */ + err = mana_smc_read_response(sc, SMC_MSG_TYPE_ESTABLISH_HWC, + SMC_MSG_TYPE_ESTABLISH_HWC_VERSION, + reset_vf); + if (err) { + dev_err(sc->dev, "Error when setting up HWC: %d\n", err); + return err; + } + + return 0; +} + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf) +{ + union smc_proto_hdr hdr = {}; + int err; + + /* Ensure already has possession of shared memory */ + err = mana_smc_poll_register(sc->base, false); + if (err) { + dev_err(sc->dev, "Timeout when tearing down HWC\n"); + return err; + } + + /* Set up protocol header for HWC destroy message */ + hdr.msg_type = SMC_MSG_TYPE_DESTROY_HWC; + hdr.msg_version = SMC_MSG_TYPE_DESTROY_HWC_VERSION; + hdr.direction = SMC_MSG_DIRECTION_REQUEST; + hdr.reset_vf = reset_vf; + + /* Write message in high 32 bits of 256-bit shared memory, causing HW + * to set possession bit to PF. + */ + writel(hdr.as_uint32, sc->base + SMC_LAST_DWORD * SMC_BASIC_UNIT); + + /* Read shmem response (polling for VF possession) and validate. + * For teardown, waiting for response is required to ensure hardware + * invalidates MST entries before software frees memory. + */ + err = mana_smc_read_response(sc, SMC_MSG_TYPE_DESTROY_HWC, + SMC_MSG_TYPE_DESTROY_HWC_VERSION, + reset_vf); + if (err) { + dev_err(sc->dev, "Error when tearing down HWC: %d\n", err); + return err; + } + + return 0; +} diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.h b/drivers/net/ethernet/microsoft/mana/shm_channel.h new file mode 100644 index 000000000000..5199b41497ff --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _SHM_CHANNEL_H +#define _SHM_CHANNEL_H + +struct shm_channel { + struct device *dev; + void __iomem *base; +}; + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base); + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index); + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf); + +#endif /* _SHM_CHANNEL_H */ diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 1634ca6d4a8f..c84c8bf2bc20 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2897,7 +2897,7 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, dev_kfree_skb_any(curr); if (segs != NULL) { curr = segs; - segs = segs->next; + segs = next; curr->next = NULL; dev_kfree_skb_any(segs); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 0e2db6ea79e9..2ec62c8d86e1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -454,6 +454,7 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) dev_consume_skb_any(skb); else dev_kfree_skb_any(skb); + return; } nfp_ccm_rx(&bpf->ccm, skb); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index e13e26e72ca0..31377923ea3d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -192,6 +192,7 @@ struct nfp_fl_internal_ports { * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded + * @merge_table: Hash table to store merged flows */ struct nfp_flower_priv { struct nfp_app *app; @@ -225,6 +226,7 @@ struct nfp_flower_priv { unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ int pre_tun_rule_cnt; + struct rhashtable merge_table; }; /** @@ -352,6 +354,12 @@ struct nfp_fl_payload_link { }; extern const struct rhashtable_params nfp_flower_table_params; +extern const struct rhashtable_params merge_table_params; + +struct nfp_merge_info { + u64 parent_ctx; + struct rhash_head ht_node; +}; struct nfp_fl_stats_frame { __be32 stats_con_id; diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index aa06fcb38f8b..327bb56b3ef5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -490,6 +490,12 @@ const struct rhashtable_params nfp_flower_table_params = { .automatic_shrinking = true, }; +const struct rhashtable_params merge_table_params = { + .key_offset = offsetof(struct nfp_merge_info, parent_ctx), + .head_offset = offsetof(struct nfp_merge_info, ht_node), + .key_len = sizeof(u64), +}; + int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, unsigned int host_num_mems) { @@ -506,6 +512,10 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, if (err) goto err_free_flow_table; + err = rhashtable_init(&priv->merge_table, &merge_table_params); + if (err) + goto err_free_stats_ctx_table; + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); /* Init ring buffer and unallocated mask_ids. */ @@ -513,7 +523,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); if (!priv->mask_ids.mask_id_free_list.buf) - goto err_free_stats_ctx_table; + goto err_free_merge_table; priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; @@ -550,6 +560,8 @@ err_free_last_used: kfree(priv->mask_ids.last_used); err_free_mask_id: kfree(priv->mask_ids.mask_id_free_list.buf); +err_free_merge_table: + rhashtable_destroy(&priv->merge_table); err_free_stats_ctx_table: rhashtable_destroy(&priv->stats_ctx_table); err_free_flow_table: @@ -568,6 +580,8 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app) nfp_check_rhashtable_empty, NULL); rhashtable_free_and_destroy(&priv->stats_ctx_table, nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->merge_table, + nfp_check_rhashtable_empty, NULL); kvfree(priv->stats); kfree(priv->mask_ids.mask_id_free_list.buf); kfree(priv->mask_ids.last_used); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index d72225d64a75..e95969c462e4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1009,6 +1009,8 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *merge_flow; struct nfp_fl_key_ls merge_key_ls; + struct nfp_merge_info *merge_info; + u64 parent_ctx = 0; int err; ASSERT_RTNL(); @@ -1019,6 +1021,15 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, nfp_flower_is_merge_flow(sub_flow2)) return -EINVAL; + /* check if the two flows are already merged */ + parent_ctx = (u64)(be32_to_cpu(sub_flow1->meta.host_ctx_id)) << 32; + parent_ctx |= (u64)(be32_to_cpu(sub_flow2->meta.host_ctx_id)); + if (rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, merge_table_params)) { + nfp_flower_cmsg_warn(app, "The two flows are already merged.\n"); + return 0; + } + err = nfp_flower_can_merge(sub_flow1, sub_flow2); if (err) return err; @@ -1060,16 +1071,33 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, if (err) goto err_release_metadata; + merge_info = kmalloc(sizeof(*merge_info), GFP_KERNEL); + if (!merge_info) { + err = -ENOMEM; + goto err_remove_rhash; + } + merge_info->parent_ctx = parent_ctx; + err = rhashtable_insert_fast(&priv->merge_table, &merge_info->ht_node, + merge_table_params); + if (err) + goto err_destroy_merge_info; + err = nfp_flower_xmit_flow(app, merge_flow, NFP_FLOWER_CMSG_TYPE_FLOW_MOD); if (err) - goto err_remove_rhash; + goto err_remove_merge_info; merge_flow->in_hw = true; sub_flow1->in_hw = false; return 0; +err_remove_merge_info: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); +err_destroy_merge_info: + kfree(merge_info); err_remove_rhash: WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, &merge_flow->fl_node, @@ -1359,7 +1387,9 @@ nfp_flower_remove_merge_flow(struct nfp_app *app, { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload_link *link, *temp; + struct nfp_merge_info *merge_info; struct nfp_fl_payload *origin; + u64 parent_ctx = 0; bool mod = false; int err; @@ -1396,8 +1426,22 @@ nfp_flower_remove_merge_flow(struct nfp_app *app, err_free_links: /* Clean any links connected with the merged flow. */ list_for_each_entry_safe(link, temp, &merge_flow->linked_flows, - merge_flow.list) + merge_flow.list) { + u32 ctx_id = be32_to_cpu(link->sub_flow.flow->meta.host_ctx_id); + + parent_ctx = (parent_ctx << 32) | (u64)(ctx_id); nfp_flower_unlink_flow(link); + } + + merge_info = rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, + merge_table_params); + if (merge_info) { + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); + kfree(merge_info); + } kfree(merge_flow->action_data); kfree(merge_flow->mask_data); diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index e72fd33a214c..64c6842bd452 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1350,9 +1350,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) __lpc_get_mac(pldat, ndev->dev_addr); if (!is_valid_ether_addr(ndev->dev_addr)) { - const char *macaddr = of_get_mac_address(np); - if (!IS_ERR(macaddr)) - ether_addr_copy(ndev->dev_addr, macaddr); + of_get_mac_address(np, ndev->dev_addr); } if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 71db1e2c7d8a..6583be570e45 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -888,55 +888,55 @@ static int ionic_get_ts_info(struct net_device *netdev, mask = cpu_to_le64(IONIC_PKT_CLS_NTP_ALL); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_NTP_ALL; + info->rx_filters |= BIT(HWTSTAMP_FILTER_NTP_ALL); mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_SYNC); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_SYNC; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC); mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_DREQ); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ); mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_ALL); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_SYNC); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_SYNC; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_DREQ); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_ALL); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_SYNC); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_SYNC; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_DREQ); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_ALL); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_SYNC); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_SYNC; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_SYNC); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_DREQ); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_DELAY_REQ; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_ALL); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) - info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_EVENT; + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); return 0; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ee56fed12e07..af3a5368529c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -135,7 +135,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) if (netdev->flags & IFF_UP && netif_running(netdev)) { mutex_lock(&lif->queue_lock); err = ionic_start_queues(lif); - if (err) { + if (err && err != -EBUSY) { netdev_err(lif->netdev, "Failed to start queues: %d\n", err); set_bit(IONIC_LIF_F_BROKEN, lif->state); @@ -2015,9 +2015,8 @@ static void ionic_txrx_free(struct ionic_lif *lif) static int ionic_txrx_alloc(struct ionic_lif *lif) { - unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz; - unsigned int flags; - unsigned int i; + unsigned int comp_sz, desc_sz, num_desc, sg_desc_sz; + unsigned int flags, i; int err = 0; num_desc = lif->ntxq_descs; @@ -2584,12 +2583,11 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b) int ionic_reconfigure_queues(struct ionic_lif *lif, struct ionic_queue_params *qparam) { - unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz; + unsigned int comp_sz, desc_sz, num_desc, sg_desc_sz; struct ionic_qcq **tx_qcqs = NULL; struct ionic_qcq **rx_qcqs = NULL; - unsigned int flags; + unsigned int flags, i; int err = -ENOMEM; - unsigned int i; /* allocate temporary qcq arrays to hold new queue structs */ if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) { @@ -2989,14 +2987,14 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) goto err_txrx_free; } - /* restore the hardware timestamping queues */ - ionic_lif_hwstamp_set(lif, NULL); - clear_bit(IONIC_LIF_F_FW_RESET, lif->state); ionic_link_status_check_request(lif, CAN_SLEEP); netif_device_attach(lif->netdev); dev_info(ionic->dev, "FW Up: LIFs restarted\n"); + /* restore the hardware timestamping queues */ + ionic_lif_hwstamp_replay(lif); + return; err_txrx_free: diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index ea3b086af179..346506f01715 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -302,6 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type, int ionic_lif_size(struct ionic *ionic); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +int ionic_lif_hwstamp_replay(struct ionic_lif *lif); int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); @@ -310,6 +311,11 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif); void ionic_lif_alloc_phc(struct ionic_lif *lif); void ionic_lif_free_phc(struct ionic_lif *lif); #else +static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +{ + return -EOPNOTSUPP; +} + static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) { return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index 86ae5011ac9b..a87c87e86aef 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -18,10 +18,8 @@ static int ionic_hwstamp_tx_mode(int config_tx_type) return IONIC_TXSTAMP_ON; case HWTSTAMP_TX_ONESTEP_SYNC: return IONIC_TXSTAMP_ONESTEP_SYNC; -#ifdef HAVE_HWSTAMP_TX_ONESTEP_P2P case HWTSTAMP_TX_ONESTEP_P2P: return IONIC_TXSTAMP_ONESTEP_P2P; -#endif default: return -ERANGE; } @@ -66,10 +64,12 @@ static u64 ionic_hwstamp_rx_filt(int config_rx_filter) } } -int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) +static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, + struct hwtstamp_config *new_ts) { struct ionic *ionic = lif->ionic; - struct hwtstamp_config config; + struct hwtstamp_config *config; + struct hwtstamp_config ts; int tx_mode = 0; u64 rx_filt = 0; int err, err2; @@ -79,39 +79,48 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) if (!lif->phc || !lif->phc->ptp) return -EOPNOTSUPP; - if (ifr) { - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; + mutex_lock(&lif->phc->config_lock); + + if (new_ts) { + config = new_ts; } else { - /* if called with ifr == NULL, behave as if called with the - * current ts_config from the initial cleared state. + /* If called with new_ts == NULL, replay the previous request + * primarily for recovery after a FW_RESET. + * We saved the previous configuration request info, so copy + * the previous request for reference, clear the current state + * to match the device's reset state, and run with it. */ - memcpy(&config, &lif->phc->ts_config, sizeof(config)); - memset(&lif->phc->ts_config, 0, sizeof(config)); + config = &ts; + memcpy(config, &lif->phc->ts_config, sizeof(*config)); + memset(&lif->phc->ts_config, 0, sizeof(lif->phc->ts_config)); + lif->phc->ts_config_tx_mode = 0; + lif->phc->ts_config_rx_filt = 0; } - tx_mode = ionic_hwstamp_tx_mode(config.tx_type); - if (tx_mode < 0) - return tx_mode; + tx_mode = ionic_hwstamp_tx_mode(config->tx_type); + if (tx_mode < 0) { + err = tx_mode; + goto err_queues; + } mask = cpu_to_le64(BIT_ULL(tx_mode)); - if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask) - return -ERANGE; + if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask) { + err = -ERANGE; + goto err_queues; + } - rx_filt = ionic_hwstamp_rx_filt(config.rx_filter); - rx_all = config.rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt; + rx_filt = ionic_hwstamp_rx_filt(config->rx_filter); + rx_all = config->rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt; mask = cpu_to_le64(rx_filt); if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) != mask) { rx_filt = 0; rx_all = true; - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; } dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n", - config.rx_filter, rx_filt, rx_all); - - mutex_lock(&lif->phc->config_lock); + config->rx_filter, rx_filt, rx_all); if (tx_mode) { err = ionic_lif_create_hwstamp_txq(lif); @@ -143,15 +152,7 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) goto err_rxall; } - if (ifr) { - err = copy_to_user(ifr->ifr_data, &config, sizeof(config)); - if (err) { - err = -EFAULT; - goto err_final; - } - } - - memcpy(&lif->phc->ts_config, &config, sizeof(config)); + memcpy(&lif->phc->ts_config, config, sizeof(*config)); lif->phc->ts_config_rx_filt = rx_filt; lif->phc->ts_config_tx_mode = tx_mode; @@ -159,14 +160,6 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) return 0; -err_final: - if (rx_all != (lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)) { - rx_all = lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL; - err2 = ionic_lif_config_hwstamp_rxq_all(lif, rx_all); - if (err2) - dev_err(ionic->dev, - "Failed to revert all-rxq timestamp config: %d\n", err2); - } err_rxall: if (rx_filt != lif->phc->ts_config_rx_filt) { rx_filt = lif->phc->ts_config_rx_filt; @@ -190,6 +183,37 @@ err_queues: return err; } +int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = ionic_lif_hwstamp_set_ts_config(lif, &config); + if (err) { + netdev_info(lif->netdev, "hwstamp set failed: %d\n", err); + return err; + } + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +{ + int err; + + err = ionic_lif_hwstamp_set_ts_config(lif, NULL); + if (err) + netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); + + return err; +} + int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) { struct hwtstamp_config config; @@ -201,7 +225,9 @@ int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) memcpy(&config, &lif->phc->ts_config, sizeof(config)); mutex_unlock(&lif->phc->config_lock); - return copy_to_user(ifr->ifr_data, &config, sizeof(config)); + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + return 0; } static u64 ionic_hwstamp_read(struct ionic *ionic, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 3478b0f2495f..08934888575c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -1203,6 +1203,7 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, if (unlikely(!ionic_q_has_space(q, ndescs))) goto err_out_drop; + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP; if (skb_is_gso(skb)) err = ionic_tx_tso(q, skb); else @@ -1233,7 +1234,7 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev) } if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) - if (lif->hwstamp_txq) + if (lif->hwstamp_txq && lif->phc->ts_config_tx_mode) return ionic_start_hwstamp_xmit(skb, netdev); if (unlikely(queue_index >= lif->nxqs)) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 5a3b65a6eb4f..ab9b02574a15 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -885,7 +885,7 @@ qca_spi_probe(struct spi_device *spi) struct net_device *qcaspi_devs = NULL; u8 legacy_mode = 0; u16 signature; - const char *mac; + int ret; if (!spi->dev.of_node) { dev_err(&spi->dev, "Missing device tree\n"); @@ -962,12 +962,8 @@ qca_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, qcaspi_devs); - mac = of_get_mac_address(spi->dev.of_node); - - if (!IS_ERR(mac)) - ether_addr_copy(qca->net_dev->dev_addr, mac); - - if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { + ret = of_get_mac_address(spi->dev.of_node, qca->net_dev->dev_addr); + if (ret) { eth_hw_addr_random(qca->net_dev); dev_info(&spi->dev, "Using random MAC address: %pM\n", qca->net_dev->dev_addr); diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 362b4f5c162c..bcdeca7b3366 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -323,7 +323,6 @@ static int qca_uart_probe(struct serdev_device *serdev) { struct net_device *qcauart_dev = alloc_etherdev(sizeof(struct qcauart)); struct qcauart *qca; - const char *mac; u32 speed = 115200; int ret; @@ -348,12 +347,8 @@ static int qca_uart_probe(struct serdev_device *serdev) of_property_read_u32(serdev->dev.of_node, "current-speed", &speed); - mac = of_get_mac_address(serdev->dev.of_node); - - if (!IS_ERR(mac)) - ether_addr_copy(qca->net_dev->dev_addr, mac); - - if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { + ret = of_get_mac_address(serdev->dev.of_node, qca->net_dev->dev_addr); + if (ret) { eth_hw_addr_random(qca->net_dev); dev_info(&serdev->dev, "Using random MAC address: %pM\n", qca->net_dev->dev_addr); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 1cd5c6f6d44f..3e86fbe21431 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1910,6 +1910,32 @@ static void rtl8169_get_ringparam(struct net_device *dev, data->tx_pending = NUM_TX_DESC; } +static void rtl8169_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + bool tx_pause, rx_pause; + + phy_get_pause(tp->phydev, &tx_pause, &rx_pause); + + data->autoneg = tp->phydev->autoneg; + data->tx_pause = tx_pause ? 1 : 0; + data->rx_pause = rx_pause ? 1 : 0; +} + +static int rtl8169_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + + if (dev->mtu > ETH_DATA_LEN) + return -EOPNOTSUPP; + + phy_set_asym_pause(tp->phydev, data->rx_pause, data->tx_pause); + + return 0; +} + static const struct ethtool_ops rtl8169_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@ -1931,6 +1957,8 @@ static const struct ethtool_ops rtl8169_ethtool_ops = { .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_ringparam = rtl8169_get_ringparam, + .get_pauseparam = rtl8169_get_pauseparam, + .set_pauseparam = rtl8169_set_pauseparam, }; static void rtl_enable_eee(struct rtl8169_private *tp) @@ -2358,6 +2386,15 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) pcie_set_readrq(tp->pci_dev, readrq); + + /* Chip doesn't support pause in jumbo mode */ + if (jumbo) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + tp->phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + tp->phydev->advertising); + phy_start_aneg(tp->phydev); + } } DECLARE_RTL_COND(rtl_chipcmd_cond) @@ -4633,8 +4670,6 @@ static int r8169_phy_connect(struct rtl8169_private *tp) if (!tp->supports_gmii) phy_set_max_speed(phydev, SPEED_100); - phy_support_asym_pause(phydev); - phy_attached_info(phydev); return 0; @@ -4660,6 +4695,7 @@ static void rtl8169_down(struct rtl8169_private *tp) static void rtl8169_up(struct rtl8169_private *tp) { pci_set_master(tp->pci_dev); + phy_init_hw(tp->phydev); phy_resume(tp->phydev); rtl8169_init_phy(tp); napi_enable(&tp->napi); @@ -5085,6 +5121,10 @@ static int r8169_mdio_register(struct rtl8169_private *tp) return -EUNATCH; } + tp->phydev->mac_managed_pm = 1; + + phy_support_asym_pause(tp->phydev); + /* PHY will be woken up in rtl_open() */ phy_suspend(tp->phydev); diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index cb47e68c1a3e..86a1eb0634e8 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -993,6 +993,7 @@ struct ravb_private { struct platform_device *pdev; void __iomem *addr; struct clk *clk; + struct clk *refclk; struct mdiobb_ctrl mdiobb; u32 num_rx_ring[NUM_RX_QUEUE]; u32 num_tx_ring[NUM_TX_QUEUE]; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index eb0c03bdb12d..8c84c40ab9a0 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -109,11 +109,13 @@ static void ravb_set_buffer_align(struct sk_buff *skb) * Ethernet AVB device doesn't have ROM for MAC address. * This function gets the MAC address that was used by a bootloader. */ -static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac) +static void ravb_read_mac_address(struct device_node *np, + struct net_device *ndev) { - if (!IS_ERR(mac)) { - ether_addr_copy(ndev->dev_addr, mac); - } else { + int ret; + + ret = of_get_mac_address(np, ndev->dev_addr); + if (ret) { u32 mahr = ravb_read(ndev, MAHR); u32 malr = ravb_read(ndev, MALR); @@ -2148,6 +2150,13 @@ static int ravb_probe(struct platform_device *pdev) goto out_release; } + priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk"); + if (IS_ERR(priv->refclk)) { + error = PTR_ERR(priv->refclk); + goto out_release; + } + clk_prepare_enable(priv->refclk); + ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); ndev->min_mtu = ETH_MIN_MTU; @@ -2200,7 +2209,7 @@ static int ravb_probe(struct platform_device *pdev) priv->msg_enable = RAVB_DEF_MSG_ENABLE; /* Read and set MAC address */ - ravb_read_mac_address(ndev, of_get_mac_address(np)); + ravb_read_mac_address(np, ndev); if (!is_valid_ether_addr(ndev->dev_addr)) { dev_warn(&pdev->dev, "no valid MAC address supplied, using a random one\n"); @@ -2244,6 +2253,7 @@ out_dma_free: if (chip_id != RCAR_GEN2) ravb_ptp_stop(ndev); out_release: + clk_disable_unprepare(priv->refclk); free_netdev(ndev); pm_runtime_put(&pdev->dev); @@ -2260,6 +2270,8 @@ static int ravb_remove(struct platform_device *pdev) if (priv->chip_id != RCAR_GEN2) ravb_ptp_stop(ndev); + clk_disable_unprepare(priv->refclk); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); /* Set reset mode */ diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index ebedb1a11132..c5b154868c1f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3170,7 +3170,6 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) struct device_node *np = dev->of_node; struct sh_eth_plat_data *pdata; phy_interface_t interface; - const char *mac_addr; int ret; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); @@ -3182,9 +3181,7 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) return NULL; pdata->phy_interface = interface; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(pdata->mac_addr, mac_addr); + of_get_mac_address(np, pdata->mac_addr); pdata->no_ether_link = of_property_read_bool(np, "renesas,no-ether-link"); diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 3473d296b2e2..a46633606cae 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2736,7 +2736,7 @@ static void rocker_switchdev_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = rocker_world_port_fdb_add(rocker_port, fdb_info); if (err) { @@ -2747,7 +2747,7 @@ static void rocker_switchdev_event_work(struct work_struct *work) break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = rocker_world_port_fdb_del(rocker_port, fdb_info); if (err) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index 33f79402850d..4639ed9438a3 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -25,8 +25,7 @@ #ifdef CONFIG_OF static int sxgbe_probe_config_dt(struct platform_device *pdev, - struct sxgbe_plat_data *plat, - const char **mac) + struct sxgbe_plat_data *plat) { struct device_node *np = pdev->dev.of_node; struct sxgbe_dma_cfg *dma_cfg; @@ -35,7 +34,6 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev, if (!np) return -ENODEV; - *mac = of_get_mac_address(np); err = of_get_phy_mode(np, &plat->interface); if (err && err != -ENODEV) return err; @@ -63,8 +61,7 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev, } #else static int sxgbe_probe_config_dt(struct platform_device *pdev, - struct sxgbe_plat_data *plat, - const char **mac) + struct sxgbe_plat_data *plat) { return -ENOSYS; } @@ -85,7 +82,6 @@ static int sxgbe_platform_probe(struct platform_device *pdev) void __iomem *addr; struct sxgbe_priv_data *priv = NULL; struct sxgbe_plat_data *plat_dat = NULL; - const char *mac = NULL; struct net_device *ndev = platform_get_drvdata(pdev); struct device_node *node = dev->of_node; @@ -101,7 +97,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev) if (!plat_dat) return -ENOMEM; - ret = sxgbe_probe_config_dt(pdev, plat_dat, &mac); + ret = sxgbe_probe_config_dt(pdev, plat_dat); if (ret) { pr_err("%s: main dt probe failed\n", __func__); return ret; @@ -122,8 +118,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev) } /* Get MAC address if available (DT) */ - if (!IS_ERR_OR_NULL(mac)) - ether_addr_copy(priv->dev->dev_addr, mac); + of_get_mac_address(node, priv->dev->dev_addr); /* Get the TX/RX IRQ numbers */ for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) { diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index da6886dcac37..c873f961d5a5 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1747,6 +1747,22 @@ static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names) mask, names); } +static void efx_ef10_get_fec_stats(struct efx_nic *efx, + struct ethtool_fec_stats *fec_stats) +{ + DECLARE_BITMAP(mask, EF10_STAT_COUNT); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u64 *stats = nic_data->stats; + + efx_ef10_get_stat_mask(efx, mask); + if (test_bit(EF10_STAT_fec_corrected_errors, mask)) + fec_stats->corrected_blocks.total = + stats[EF10_STAT_fec_corrected_errors]; + if (test_bit(EF10_STAT_fec_uncorrected_errors, mask)) + fec_stats->uncorrectable_blocks.total = + stats[EF10_STAT_fec_uncorrected_errors]; +} + static size_t efx_ef10_update_stats_common(struct efx_nic *efx, u64 *full_stats, struct rtnl_link_stats64 *core_stats) { @@ -4122,6 +4138,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .get_wol = efx_ef10_get_wol, .set_wol = efx_ef10_set_wol, .resume_wol = efx_port_dummy_op_void, + .get_fec_stats = efx_ef10_get_fec_stats, .test_chip = efx_ef10_test_chip, .test_nvram = efx_mcdi_nvram_test_all, .mcdi_request = efx_ef10_mcdi_request, diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index 3332cdf2918a..cd590e0685e5 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h @@ -78,7 +78,6 @@ enum efx_loopback_mode { (1 << LOOPBACK_XAUI) | \ (1 << LOOPBACK_GMII) | \ (1 << LOOPBACK_SGMII) | \ - (1 << LOOPBACK_SGMII) | \ (1 << LOOPBACK_XGBR) | \ (1 << LOOPBACK_XFI) | \ (1 << LOOPBACK_XAUI_FAR) | \ diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 12a91c559aa2..058d9fe41d99 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -206,6 +206,15 @@ static int efx_ethtool_set_wol(struct net_device *net_dev, return efx->type->set_wol(efx, wol->wolopts); } +static void efx_ethtool_get_fec_stats(struct net_device *net_dev, + struct ethtool_fec_stats *fec_stats) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->get_fec_stats) + efx->type->get_fec_stats(efx, fec_stats); +} + static int efx_ethtool_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info) { @@ -257,6 +266,7 @@ const struct ethtool_ops efx_ethtool_ops = { .get_module_eeprom = efx_ethtool_get_module_eeprom, .get_link_ksettings = efx_ethtool_get_link_ksettings, .set_link_ksettings = efx_ethtool_set_link_ksettings, + .get_fec_stats = efx_ethtool_get_fec_stats, .get_fecparam = efx_ethtool_get_fecparam, .set_fecparam = efx_ethtool_set_fecparam, }; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9f7dfdf708cf..9b4b25704271 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1187,6 +1187,7 @@ struct efx_udp_tunnel { * @get_wol: Get WoL configuration from driver state * @set_wol: Push WoL configuration to the NIC * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume) + * @get_fec_stats: Get standard FEC statistics. * @test_chip: Test registers. May use efx_farch_test_registers(), and is * expected to reset the NIC. * @test_nvram: Test validity of NVRAM contents @@ -1332,6 +1333,8 @@ struct efx_nic_type { void (*get_wol)(struct efx_nic *efx, struct ethtool_wolinfo *wol); int (*set_wol)(struct efx_nic *efx, u32 type); void (*resume_wol)(struct efx_nic *efx); + void (*get_fec_stats)(struct efx_nic *efx, + struct ethtool_fec_stats *fec_stats); unsigned int (*check_caps)(const struct efx_nic *efx, u8 flag, u32 offset); diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 501b9c7aba56..fcbb4bb31408 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1559,7 +1559,6 @@ static int ave_probe(struct platform_device *pdev) struct ave_private *priv; struct net_device *ndev; struct device_node *np; - const void *mac_addr; void __iomem *base; const char *name; int i, irq, ret; @@ -1600,12 +1599,9 @@ static int ave_probe(struct platform_device *pdev) ndev->max_mtu = AVE_MAX_ETHFRAME - (ETH_HLEN + ETH_FCS_LEN); - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - - /* if the mac address is invalid, use random mac address */ - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(np, ndev->dev_addr); + if (ret) { + /* if the mac address is invalid, use random mac address */ eth_hw_addr_random(ndev); dev_warn(dev, "Using random MAC address: %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index 08c76636c164..dfbaea06d108 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -115,7 +115,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev) if (IS_ERR(gmac)) return PTR_ERR(gmac); - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 27254b27d7ed..bc91fd867dcd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -438,7 +438,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(stmmac_res.addr)) return PTR_ERR(stmmac_res.addr); - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index fad503820e04..fbfda55b4c52 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) return ret; if (pdev->dev.of_node) { - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index c1a361305a5a..84651207a1de 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -231,7 +231,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (!dwmac) return -ENOMEM; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 6c19fcc76c6f..06d287f104be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -85,7 +85,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 60566598d644..ec140fc4a0f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -296,6 +296,13 @@ static int intel_crosststamp(ktime_t *device, intel_priv = priv->plat->bsp_priv; + /* Both internal crosstimestamping and external triggered event + * timestamping cannot be run concurrently. + */ + if (priv->plat->ext_snapshot_en) + return -EBUSY; + + mutex_lock(&priv->aux_ts_lock); /* Enable Internal snapshot trigger */ acr_value = readl(ptpaddr + PTP_ACR); acr_value &= ~PTP_ACR_MASK; @@ -321,6 +328,8 @@ static int intel_crosststamp(ktime_t *device, acr_value = readl(ptpaddr + PTP_ACR); acr_value |= PTP_ACR_ATSFC; writel(acr_value, ptpaddr + PTP_ACR); + /* Release the mutex */ + mutex_unlock(&priv->aux_ts_lock); /* Trigger Internal snapshot signal * Create a rising edge by just toggle the GPO1 to low @@ -520,6 +529,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR; plat->int_snapshot_num = AUX_SNAPSHOT1; + plat->ext_snapshot_num = AUX_SNAPSHOT0; plat->has_crossts = true; plat->crosststamp = intel_crosststamp; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 749585fe6fc9..28dd0ed85a82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -255,7 +255,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) if (val) return val; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 3d3f43d91b98..9d77c647badd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 9e4b83832938..58c0feaa8131 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -407,7 +407,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index bbc16b5a410a..16fb66a0ca72 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -52,7 +52,7 @@ static int meson6_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 848e5c37746b..c7a6588d9398 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -398,7 +398,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index 8551ea878ba5..adfeb8d3293d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -118,7 +118,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index a674b7d6b49a..84382fc5cc4d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -461,7 +461,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6ef30252bfe0..8d28a536e1bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1396,7 +1396,7 @@ static int rk_gmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 70d41783329d..85208128f135 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -398,7 +398,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index e1b63df6f96f..710d7435733e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -325,7 +325,7 @@ static int sti_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 5d4df4c5254e..2b38a499a404 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -371,7 +371,7 @@ static int stm32_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 19e7ec30af4c..4422baeed3d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1221,7 +1221,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return -EINVAL; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 0e1ca2cba3c7..527077c98ebc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -108,7 +108,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index d23be45a64e5..d046e33b8a29 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -208,7 +208,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 2b5022ef1e52..2cc91759b91f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -504,6 +504,8 @@ struct stmmac_ops { #define stmmac_fpe_irq_status(__priv, __args...) \ stmmac_do_callback(__priv, mac, fpe_irq_status, __args) +struct stmmac_priv; + /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); @@ -515,6 +517,7 @@ struct stmmac_hwtimestamp { int add_sub, int gmac4); void (*get_systime) (void __iomem *ioaddr, u64 *systime); void (*get_ptptime)(void __iomem *ioaddr, u64 *ptp_time); + void (*timestamp_interrupt)(struct stmmac_priv *priv); }; #define stmmac_config_hw_tstamping(__priv, __args...) \ @@ -531,6 +534,8 @@ struct stmmac_hwtimestamp { stmmac_do_void_callback(__priv, ptp, get_systime, __args) #define stmmac_get_ptptime(__priv, __args...) \ stmmac_do_void_callback(__priv, ptp, get_ptptime, __args) +#define stmmac_timestamp_interrupt(__priv, __args...) \ + stmmac_do_void_callback(__priv, ptp, timestamp_interrupt, __args) /* Helpers to manage the descriptors for chain and ring modes */ struct stmmac_mode_ops { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index c49debb62b05..b6cd43eda7ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -26,7 +26,7 @@ struct stmmac_resources { void __iomem *addr; - const char *mac; + u8 mac[ETH_ALEN]; int wol_irq; int lpi_irq; int irq; @@ -40,6 +40,7 @@ enum stmmac_txbuf_type { STMMAC_TXBUF_T_SKB, STMMAC_TXBUF_T_XDP_TX, STMMAC_TXBUF_T_XDP_NDO, + STMMAC_TXBUF_T_XSK_TX, }; struct stmmac_tx_info { @@ -69,6 +70,8 @@ struct stmmac_tx_queue { struct xdp_frame **xdpf; }; struct stmmac_tx_info *tx_skbuff_dma; + struct xsk_buff_pool *xsk_pool; + u32 xsk_frames_done; unsigned int cur_tx; unsigned int dirty_tx; dma_addr_t dma_tx_phy; @@ -77,9 +80,14 @@ struct stmmac_tx_queue { }; struct stmmac_rx_buffer { - struct page *page; - dma_addr_t addr; - __u32 page_offset; + union { + struct { + struct page *page; + dma_addr_t addr; + __u32 page_offset; + }; + struct xdp_buff *xdp; + }; struct page *sec_page; dma_addr_t sec_addr; }; @@ -88,6 +96,7 @@ struct stmmac_rx_queue { u32 rx_count_frames; u32 queue_index; struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; struct page_pool *page_pool; struct stmmac_rx_buffer *buf_pool; struct stmmac_priv *priv_data; @@ -95,6 +104,7 @@ struct stmmac_rx_queue { struct dma_desc *dma_rx ____cacheline_aligned_in_smp; unsigned int cur_rx; unsigned int dirty_rx; + unsigned int buf_alloc_num; u32 rx_zeroc_thresh; dma_addr_t dma_rx_phy; u32 rx_tail_addr; @@ -109,6 +119,7 @@ struct stmmac_rx_queue { struct stmmac_channel { struct napi_struct rx_napi ____cacheline_aligned_in_smp; struct napi_struct tx_napi ____cacheline_aligned_in_smp; + struct napi_struct rxtx_napi ____cacheline_aligned_in_smp; struct stmmac_priv *priv_data; spinlock_t lock; u32 index; @@ -239,6 +250,9 @@ struct stmmac_priv { int use_riwt; int irq_wake; spinlock_t ptp_lock; + /* Protects auxiliary snapshot registers from concurrent access. */ + struct mutex aux_ts_lock; + void __iomem *mmcaddr; void __iomem *ptpaddr; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -283,6 +297,7 @@ struct stmmac_priv { struct stmmac_rss rss; /* XDP BPF Program */ + unsigned long *af_xdp_zc_qps; struct bpf_prog *xdp_prog; }; @@ -328,6 +343,12 @@ static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv) return 0; } +void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue); +void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue); +void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue); +void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue); +int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags); + #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS) void stmmac_selftest_run(struct net_device *dev, struct ethtool_test *etest, u64 *buf); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 113c51bcc0b5..074e2cdfb0fa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -12,8 +12,11 @@ #include <linux/io.h> #include <linux/iopoll.h> #include <linux/delay.h> +#include <linux/ptp_clock_kernel.h> #include "common.h" #include "stmmac_ptp.h" +#include "dwmac4.h" +#include "stmmac.h" static void config_hw_tstamping(void __iomem *ioaddr, u32 data) { @@ -163,6 +166,41 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) *ptp_time = ns; } +static void timestamp_interrupt(struct stmmac_priv *priv) +{ + u32 num_snapshot, ts_status, tsync_int; + struct ptp_clock_event event; + unsigned long flags; + u64 ptp_time; + int i; + + tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE; + + if (!tsync_int) + return; + + /* Read timestamp status to clear interrupt from either external + * timestamp or start/end of PPS. + */ + ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS); + + if (!priv->plat->ext_snapshot_en) + return; + + num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> + GMAC_TIMESTAMP_ATSNS_SHIFT; + + for (i = 0; i < num_snapshot; i++) { + spin_lock_irqsave(&priv->ptp_lock, flags); + get_ptptime(priv->ptpaddr, &ptp_time); + spin_unlock_irqrestore(&priv->ptp_lock, flags); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = ptp_time; + ptp_clock_event(priv->ptp_clock, &event); + } +} + const struct stmmac_hwtimestamp stmmac_ptp = { .config_hw_tstamping = config_hw_tstamping, .init_systime = init_systime, @@ -171,4 +209,5 @@ const struct stmmac_hwtimestamp stmmac_ptp = { .adjust_systime = adjust_systime, .get_systime = get_systime, .get_ptptime = get_ptptime, + .timestamp_interrupt = timestamp_interrupt, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 77285646c5fc..d1ca07c846e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -40,6 +40,7 @@ #include <linux/udp.h> #include <linux/bpf_trace.h> #include <net/pkt_cls.h> +#include <net/xdp_sock_drv.h> #include "stmmac_ptp.h" #include "stmmac.h" #include "stmmac_xdp.h" @@ -69,6 +70,11 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_TX_THRESH(x) ((x)->dma_tx_size / 4) #define STMMAC_RX_THRESH(x) ((x)->dma_rx_size / 4) +/* Limit to make sure XDP TX and slow path can coexist */ +#define STMMAC_XSK_TX_BUDGET_MAX 256 +#define STMMAC_TX_XSK_AVAIL 16 +#define STMMAC_RX_FILL_BATCH 16 + #define STMMAC_XDP_PASS 0 #define STMMAC_XDP_CONSUMED BIT(0) #define STMMAC_XDP_TX BIT(1) @@ -117,6 +123,8 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id); static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id); static irqreturn_t stmmac_msi_intr_tx(int irq, void *data); static irqreturn_t stmmac_msi_intr_rx(int irq, void *data); +static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue); +static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -179,11 +187,7 @@ static void stmmac_verify_args(void) eee_timer = STMMAC_DEFAULT_LPI_TIMER; } -/** - * stmmac_disable_all_queues - Disable all queues - * @priv: driver private structure - */ -static void stmmac_disable_all_queues(struct stmmac_priv *priv) +static void __stmmac_disable_all_queues(struct stmmac_priv *priv) { u32 rx_queues_cnt = priv->plat->rx_queues_to_use; u32 tx_queues_cnt = priv->plat->tx_queues_to_use; @@ -193,6 +197,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + if (stmmac_xdp_is_enabled(priv) && + test_bit(queue, priv->af_xdp_zc_qps)) { + napi_disable(&ch->rxtx_napi); + continue; + } + if (queue < rx_queues_cnt) napi_disable(&ch->rx_napi); if (queue < tx_queues_cnt) @@ -201,6 +211,28 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) } /** + * stmmac_disable_all_queues - Disable all queues + * @priv: driver private structure + */ +static void stmmac_disable_all_queues(struct stmmac_priv *priv) +{ + u32 rx_queues_cnt = priv->plat->rx_queues_to_use; + struct stmmac_rx_queue *rx_q; + u32 queue; + + /* synchronize_rcu() needed for pending XDP buffers to drain */ + for (queue = 0; queue < rx_queues_cnt; queue++) { + rx_q = &priv->rx_queue[queue]; + if (rx_q->xsk_pool) { + synchronize_rcu(); + break; + } + } + + __stmmac_disable_all_queues(priv); +} + +/** * stmmac_enable_all_queues - Enable all queues * @priv: driver private structure */ @@ -214,6 +246,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv) for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + if (stmmac_xdp_is_enabled(priv) && + test_bit(queue, priv->af_xdp_zc_qps)) { + napi_enable(&ch->rxtx_napi); + continue; + } + if (queue < rx_queues_cnt) napi_enable(&ch->rx_napi); if (queue < tx_queues_cnt) @@ -1388,12 +1426,14 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->page) - return -ENOMEM; - buf->page_offset = stmmac_rx_offset(priv); + if (!buf->page) { + buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + if (!buf->page) + return -ENOMEM; + buf->page_offset = stmmac_rx_offset(priv); + } - if (priv->sph) { + if (priv->sph && !buf->sec_page) { buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); if (!buf->sec_page) return -ENOMEM; @@ -1465,6 +1505,9 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) tx_q->xdpf[i] = NULL; } + if (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XSK_TX) + tx_q->xsk_frames_done++; + if (tx_q->tx_skbuff[i] && tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) { dev_kfree_skb_any(tx_q->tx_skbuff[i]); @@ -1476,167 +1519,206 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) } /** - * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer. - * @priv: driver private structure - * Description: this function is called to re-allocate a receive buffer, perform - * the DMA mapping and init the descriptor. + * dma_free_rx_skbufs - free RX dma buffers + * @priv: private structure + * @queue: RX queue index */ -static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv) +static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue) { - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; int i; - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + for (i = 0; i < priv->dma_rx_size; i++) + stmmac_free_rx_buffer(priv, queue, i); +} - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; +static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, u32 queue, + gfp_t flags) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int i; - if (buf->page) { - page_pool_recycle_direct(rx_q->page_pool, buf->page); - buf->page = NULL; - } + for (i = 0; i < priv->dma_rx_size; i++) { + struct dma_desc *p; + int ret; - if (priv->sph && buf->sec_page) { - page_pool_recycle_direct(rx_q->page_pool, buf->sec_page); - buf->sec_page = NULL; - } - } + if (priv->extend_desc) + p = &((rx_q->dma_erx + i)->basic); + else + p = rx_q->dma_rx + i; + + ret = stmmac_init_rx_buffers(priv, p, i, flags, + queue); + if (ret) + return ret; + + rx_q->buf_alloc_num++; } - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + return 0; +} - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - struct dma_desc *p; +/** + * dma_free_rx_xskbufs - free RX dma buffers from XSK pool + * @priv: private structure + * @queue: RX queue index + */ +static void dma_free_rx_xskbufs(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int i; - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; + for (i = 0; i < priv->dma_rx_size; i++) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->page) - goto err_reinit_rx_buffers; + if (!buf->xdp) + continue; - buf->addr = page_pool_get_dma_addr(buf->page) + - buf->page_offset; - } + xsk_buff_free(buf->xdp); + buf->xdp = NULL; + } +} - if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->sec_page) - goto err_reinit_rx_buffers; +static int stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int i; - buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - } + for (i = 0; i < priv->dma_rx_size; i++) { + struct stmmac_rx_buffer *buf; + dma_addr_t dma_addr; + struct dma_desc *p; - stmmac_set_desc_addr(priv, p, buf->addr); - if (priv->sph) - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); - else - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); - if (priv->dma_buf_sz == BUF_SIZE_16KiB) - stmmac_init_desc3(priv, p); - } - } + if (priv->extend_desc) + p = (struct dma_desc *)(rx_q->dma_erx + i); + else + p = rx_q->dma_rx + i; - return; + buf = &rx_q->buf_pool[i]; -err_reinit_rx_buffers: - do { - while (--i >= 0) - stmmac_free_rx_buffer(priv, queue, i); + buf->xdp = xsk_buff_alloc(rx_q->xsk_pool); + if (!buf->xdp) + return -ENOMEM; - if (queue == 0) - break; + dma_addr = xsk_buff_xdp_get_dma(buf->xdp); + stmmac_set_desc_addr(priv, p, dma_addr); + rx_q->buf_alloc_num++; + } - i = priv->dma_rx_size; - } while (queue-- > 0); + return 0; +} + +static struct xsk_buff_pool *stmmac_get_xsk_pool(struct stmmac_priv *priv, u32 queue) +{ + if (!stmmac_xdp_is_enabled(priv) || !test_bit(queue, priv->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, queue); } /** - * init_dma_rx_desc_rings - init the RX descriptor rings - * @dev: net device structure + * __init_dma_rx_desc_rings - init the RX descriptor ring (per queue) + * @priv: driver private structure + * @queue: RX queue index * @flags: gfp flag. * Description: this function initializes the DMA RX descriptors * and allocates the socket buffers. It supports the chained and ring * modes. */ -static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) +static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t flags) { - struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_count = priv->plat->rx_queues_to_use; - int ret = -ENOMEM; - int queue; - int i; + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int ret; - /* RX INITIALIZATION */ netif_dbg(priv, probe, priv->dev, - "SKB addresses:\nskb\t\tskb data\tdma data\n"); + "(%s) dma_rx_phy=0x%08x\n", __func__, + (u32)rx_q->dma_rx_phy); - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - int ret; + stmmac_clear_rx_descriptors(priv, queue); - netif_dbg(priv, probe, priv->dev, - "(%s) dma_rx_phy=0x%08x\n", __func__, - (u32)rx_q->dma_rx_phy); + xdp_rxq_info_unreg_mem_model(&rx_q->xdp_rxq); - stmmac_clear_rx_descriptors(priv, queue); + rx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue); + if (rx_q->xsk_pool) { + WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL)); + netdev_info(priv->dev, + "Register MEM_TYPE_XSK_BUFF_POOL RxQ-%d\n", + rx_q->queue_index); + xsk_pool_set_rxq_info(rx_q->xsk_pool, &rx_q->xdp_rxq); + } else { WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq, MEM_TYPE_PAGE_POOL, rx_q->page_pool)); - netdev_info(priv->dev, "Register MEM_TYPE_PAGE_POOL RxQ-%d\n", rx_q->queue_index); + } - for (i = 0; i < priv->dma_rx_size; i++) { - struct dma_desc *p; + if (rx_q->xsk_pool) { + /* RX XDP ZC buffer pool may not be populated, e.g. + * xdpsock TX-only. + */ + stmmac_alloc_rx_buffers_zc(priv, queue); + } else { + ret = stmmac_alloc_rx_buffers(priv, queue, flags); + if (ret < 0) + return -ENOMEM; + } - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; + rx_q->cur_rx = 0; + rx_q->dirty_rx = 0; - ret = stmmac_init_rx_buffers(priv, p, i, flags, - queue); - if (ret) - goto err_init_rx_buffers; - } + /* Setup the chained descriptor addresses */ + if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->extend_desc) + stmmac_mode_init(priv, rx_q->dma_erx, + rx_q->dma_rx_phy, + priv->dma_rx_size, 1); + else + stmmac_mode_init(priv, rx_q->dma_rx, + rx_q->dma_rx_phy, + priv->dma_rx_size, 0); + } - rx_q->cur_rx = 0; - rx_q->dirty_rx = (unsigned int)(i - priv->dma_rx_size); - - /* Setup the chained descriptor addresses */ - if (priv->mode == STMMAC_CHAIN_MODE) { - if (priv->extend_desc) - stmmac_mode_init(priv, rx_q->dma_erx, - rx_q->dma_rx_phy, - priv->dma_rx_size, 1); - else - stmmac_mode_init(priv, rx_q->dma_rx, - rx_q->dma_rx_phy, - priv->dma_rx_size, 0); - } + return 0; +} + +static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) +{ + struct stmmac_priv *priv = netdev_priv(dev); + u32 rx_count = priv->plat->rx_queues_to_use; + u32 queue; + int ret; + + /* RX INITIALIZATION */ + netif_dbg(priv, probe, priv->dev, + "SKB addresses:\nskb\t\tskb data\tdma data\n"); + + for (queue = 0; queue < rx_count; queue++) { + ret = __init_dma_rx_desc_rings(priv, queue, flags); + if (ret) + goto err_init_rx_buffers; } return 0; err_init_rx_buffers: while (queue >= 0) { - while (--i >= 0) - stmmac_free_rx_buffer(priv, queue, i); + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + if (rx_q->xsk_pool) + dma_free_rx_xskbufs(priv, queue); + else + dma_free_rx_skbufs(priv, queue); + + rx_q->buf_alloc_num = 0; + rx_q->xsk_pool = NULL; if (queue == 0) break; - i = priv->dma_rx_size; queue--; } @@ -1644,63 +1726,75 @@ err_init_rx_buffers: } /** - * init_dma_tx_desc_rings - init the TX descriptor rings - * @dev: net device structure. + * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue) + * @priv: driver private structure + * @queue : TX queue index * Description: this function initializes the DMA TX descriptors * and allocates the socket buffers. It supports the chained and ring * modes. */ -static int init_dma_tx_desc_rings(struct net_device *dev) +static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, u32 queue) { - struct stmmac_priv *priv = netdev_priv(dev); - u32 tx_queue_cnt = priv->plat->tx_queues_to_use; - u32 queue; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; int i; - for (queue = 0; queue < tx_queue_cnt; queue++) { - struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + netif_dbg(priv, probe, priv->dev, + "(%s) dma_tx_phy=0x%08x\n", __func__, + (u32)tx_q->dma_tx_phy); - netif_dbg(priv, probe, priv->dev, - "(%s) dma_tx_phy=0x%08x\n", __func__, - (u32)tx_q->dma_tx_phy); - - /* Setup the chained descriptor addresses */ - if (priv->mode == STMMAC_CHAIN_MODE) { - if (priv->extend_desc) - stmmac_mode_init(priv, tx_q->dma_etx, - tx_q->dma_tx_phy, - priv->dma_tx_size, 1); - else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) - stmmac_mode_init(priv, tx_q->dma_tx, - tx_q->dma_tx_phy, - priv->dma_tx_size, 0); - } + /* Setup the chained descriptor addresses */ + if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->extend_desc) + stmmac_mode_init(priv, tx_q->dma_etx, + tx_q->dma_tx_phy, + priv->dma_tx_size, 1); + else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) + stmmac_mode_init(priv, tx_q->dma_tx, + tx_q->dma_tx_phy, + priv->dma_tx_size, 0); + } - for (i = 0; i < priv->dma_tx_size; i++) { - struct dma_desc *p; - if (priv->extend_desc) - p = &((tx_q->dma_etx + i)->basic); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - p = &((tx_q->dma_entx + i)->basic); - else - p = tx_q->dma_tx + i; + tx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue); - stmmac_clear_desc(priv, p); + for (i = 0; i < priv->dma_tx_size; i++) { + struct dma_desc *p; - tx_q->tx_skbuff_dma[i].buf = 0; - tx_q->tx_skbuff_dma[i].map_as_page = false; - tx_q->tx_skbuff_dma[i].len = 0; - tx_q->tx_skbuff_dma[i].last_segment = false; - tx_q->tx_skbuff[i] = NULL; - } + if (priv->extend_desc) + p = &((tx_q->dma_etx + i)->basic); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + p = &((tx_q->dma_entx + i)->basic); + else + p = tx_q->dma_tx + i; - tx_q->dirty_tx = 0; - tx_q->cur_tx = 0; - tx_q->mss = 0; + stmmac_clear_desc(priv, p); - netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); + tx_q->tx_skbuff_dma[i].buf = 0; + tx_q->tx_skbuff_dma[i].map_as_page = false; + tx_q->tx_skbuff_dma[i].len = 0; + tx_q->tx_skbuff_dma[i].last_segment = false; + tx_q->tx_skbuff[i] = NULL; } + tx_q->dirty_tx = 0; + tx_q->cur_tx = 0; + tx_q->mss = 0; + + netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); + + return 0; +} + +static int init_dma_tx_desc_rings(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + u32 tx_queue_cnt; + u32 queue; + + tx_queue_cnt = priv->plat->tx_queues_to_use; + + for (queue = 0; queue < tx_queue_cnt; queue++) + __init_dma_tx_desc_rings(priv, queue); + return 0; } @@ -1732,29 +1826,25 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags) } /** - * dma_free_rx_skbufs - free RX dma buffers - * @priv: private structure - * @queue: RX queue index - */ -static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue) -{ - int i; - - for (i = 0; i < priv->dma_rx_size; i++) - stmmac_free_rx_buffer(priv, queue, i); -} - -/** * dma_free_tx_skbufs - free TX dma buffers * @priv: private structure * @queue: TX queue index */ static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; int i; + tx_q->xsk_frames_done = 0; + for (i = 0; i < priv->dma_tx_size; i++) stmmac_free_tx_buffer(priv, queue, i); + + if (tx_q->xsk_pool && tx_q->xsk_frames_done) { + xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done); + tx_q->xsk_frames_done = 0; + tx_q->xsk_pool = NULL; + } } /** @@ -1771,153 +1861,186 @@ static void stmmac_free_tx_skbufs(struct stmmac_priv *priv) } /** - * free_dma_rx_desc_resources - free RX dma desc resources + * __free_dma_rx_desc_resources - free RX dma desc resources (per queue) * @priv: private structure + * @queue: RX queue index */ -static void free_dma_rx_desc_resources(struct stmmac_priv *priv) +static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue) { - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; - - /* Free RX queue resources */ - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - /* Release the DMA RX socket buffers */ + /* Release the DMA RX socket buffers */ + if (rx_q->xsk_pool) + dma_free_rx_xskbufs(priv, queue); + else dma_free_rx_skbufs(priv, queue); - /* Free DMA regions of consistent memory previously allocated */ - if (!priv->extend_desc) - dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_desc), - rx_q->dma_rx, rx_q->dma_rx_phy); - else - dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_extended_desc), - rx_q->dma_erx, rx_q->dma_rx_phy); + rx_q->buf_alloc_num = 0; + rx_q->xsk_pool = NULL; - if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq)) - xdp_rxq_info_unreg(&rx_q->xdp_rxq); + /* Free DMA regions of consistent memory previously allocated */ + if (!priv->extend_desc) + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_desc), + rx_q->dma_rx, rx_q->dma_rx_phy); + else + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_extended_desc), + rx_q->dma_erx, rx_q->dma_rx_phy); - kfree(rx_q->buf_pool); - if (rx_q->page_pool) - page_pool_destroy(rx_q->page_pool); - } + if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq)) + xdp_rxq_info_unreg(&rx_q->xdp_rxq); + + kfree(rx_q->buf_pool); + if (rx_q->page_pool) + page_pool_destroy(rx_q->page_pool); +} + +static void free_dma_rx_desc_resources(struct stmmac_priv *priv) +{ + u32 rx_count = priv->plat->rx_queues_to_use; + u32 queue; + + /* Free RX queue resources */ + for (queue = 0; queue < rx_count; queue++) + __free_dma_rx_desc_resources(priv, queue); } /** - * free_dma_tx_desc_resources - free TX dma desc resources + * __free_dma_tx_desc_resources - free TX dma desc resources (per queue) * @priv: private structure + * @queue: TX queue index */ -static void free_dma_tx_desc_resources(struct stmmac_priv *priv) +static void __free_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue) { - u32 tx_count = priv->plat->tx_queues_to_use; - u32 queue; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; - /* Free TX queue resources */ - for (queue = 0; queue < tx_count; queue++) { - struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; - size_t size; - void *addr; + /* Release the DMA TX socket buffers */ + dma_free_tx_skbufs(priv, queue); + + if (priv->extend_desc) { + size = sizeof(struct dma_extended_desc); + addr = tx_q->dma_etx; + } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { + size = sizeof(struct dma_edesc); + addr = tx_q->dma_entx; + } else { + size = sizeof(struct dma_desc); + addr = tx_q->dma_tx; + } - /* Release the DMA TX socket buffers */ - dma_free_tx_skbufs(priv, queue); + size *= priv->dma_tx_size; - if (priv->extend_desc) { - size = sizeof(struct dma_extended_desc); - addr = tx_q->dma_etx; - } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { - size = sizeof(struct dma_edesc); - addr = tx_q->dma_entx; - } else { - size = sizeof(struct dma_desc); - addr = tx_q->dma_tx; - } + dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); - size *= priv->dma_tx_size; + kfree(tx_q->tx_skbuff_dma); + kfree(tx_q->tx_skbuff); +} - dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); +static void free_dma_tx_desc_resources(struct stmmac_priv *priv) +{ + u32 tx_count = priv->plat->tx_queues_to_use; + u32 queue; - kfree(tx_q->tx_skbuff_dma); - kfree(tx_q->tx_skbuff); - } + /* Free TX queue resources */ + for (queue = 0; queue < tx_count; queue++) + __free_dma_tx_desc_resources(priv, queue); } /** - * alloc_dma_rx_desc_resources - alloc RX resources. + * __alloc_dma_rx_desc_resources - alloc RX resources (per queue). * @priv: private structure + * @queue: RX queue index * Description: according to which descriptor can be used (extend or basic) * this function allocates the resources for TX and RX paths. In case of * reception, for example, it pre-allocated the RX socket buffer in order to * allow zero-copy mechanism. */ -static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv) +static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct stmmac_channel *ch = &priv->channel[queue]; bool xdp_prog = stmmac_xdp_is_enabled(priv); - u32 rx_count = priv->plat->rx_queues_to_use; - int ret = -ENOMEM; - u32 queue; + struct page_pool_params pp_params = { 0 }; + unsigned int num_pages; + unsigned int napi_id; + int ret; - /* RX queues buffers and DMA */ - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - struct stmmac_channel *ch = &priv->channel[queue]; - struct page_pool_params pp_params = { 0 }; - unsigned int num_pages; - int ret; + rx_q->queue_index = queue; + rx_q->priv_data = priv; + + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = priv->dma_rx_size; + num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE); + pp_params.order = ilog2(num_pages); + pp_params.nid = dev_to_node(priv->device); + pp_params.dev = priv->device; + pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + pp_params.offset = stmmac_rx_offset(priv); + pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages); + + rx_q->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rx_q->page_pool)) { + ret = PTR_ERR(rx_q->page_pool); + rx_q->page_pool = NULL; + return ret; + } - rx_q->queue_index = queue; - rx_q->priv_data = priv; - - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; - pp_params.pool_size = priv->dma_rx_size; - num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE); - pp_params.order = ilog2(num_pages); - pp_params.nid = dev_to_node(priv->device); - pp_params.dev = priv->device; - pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - pp_params.offset = stmmac_rx_offset(priv); - pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages); - - rx_q->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rx_q->page_pool)) { - ret = PTR_ERR(rx_q->page_pool); - rx_q->page_pool = NULL; - goto err_dma; - } + rx_q->buf_pool = kcalloc(priv->dma_rx_size, + sizeof(*rx_q->buf_pool), + GFP_KERNEL); + if (!rx_q->buf_pool) + return -ENOMEM; - rx_q->buf_pool = kcalloc(priv->dma_rx_size, - sizeof(*rx_q->buf_pool), - GFP_KERNEL); - if (!rx_q->buf_pool) - goto err_dma; + if (priv->extend_desc) { + rx_q->dma_erx = dma_alloc_coherent(priv->device, + priv->dma_rx_size * + sizeof(struct dma_extended_desc), + &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!rx_q->dma_erx) + return -ENOMEM; - if (priv->extend_desc) { - rx_q->dma_erx = dma_alloc_coherent(priv->device, - priv->dma_rx_size * - sizeof(struct dma_extended_desc), - &rx_q->dma_rx_phy, - GFP_KERNEL); - if (!rx_q->dma_erx) - goto err_dma; + } else { + rx_q->dma_rx = dma_alloc_coherent(priv->device, + priv->dma_rx_size * + sizeof(struct dma_desc), + &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!rx_q->dma_rx) + return -ENOMEM; + } - } else { - rx_q->dma_rx = dma_alloc_coherent(priv->device, - priv->dma_rx_size * - sizeof(struct dma_desc), - &rx_q->dma_rx_phy, - GFP_KERNEL); - if (!rx_q->dma_rx) - goto err_dma; - } + if (stmmac_xdp_is_enabled(priv) && + test_bit(queue, priv->af_xdp_zc_qps)) + napi_id = ch->rxtx_napi.napi_id; + else + napi_id = ch->rx_napi.napi_id; - ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev, - rx_q->queue_index, - ch->rx_napi.napi_id); - if (ret) { - netdev_err(priv->dev, "Failed to register xdp rxq info\n"); + ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev, + rx_q->queue_index, + napi_id); + if (ret) { + netdev_err(priv->dev, "Failed to register xdp rxq info\n"); + return -EINVAL; + } + + return 0; +} + +static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv) +{ + u32 rx_count = priv->plat->rx_queues_to_use; + u32 queue; + int ret; + + /* RX queues buffers and DMA */ + for (queue = 0; queue < rx_count; queue++) { + ret = __alloc_dma_rx_desc_resources(priv, queue); + if (ret) goto err_dma; - } } return 0; @@ -1929,60 +2052,70 @@ err_dma: } /** - * alloc_dma_tx_desc_resources - alloc TX resources. + * __alloc_dma_tx_desc_resources - alloc TX resources (per queue). * @priv: private structure + * @queue: TX queue index * Description: according to which descriptor can be used (extend or basic) * this function allocates the resources for TX and RX paths. In case of * reception, for example, it pre-allocated the RX socket buffer in order to * allow zero-copy mechanism. */ -static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv) +static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue) { - u32 tx_count = priv->plat->tx_queues_to_use; - int ret = -ENOMEM; - u32 queue; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; - /* TX queues buffers and DMA */ - for (queue = 0; queue < tx_count; queue++) { - struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; - size_t size; - void *addr; + tx_q->queue_index = queue; + tx_q->priv_data = priv; - tx_q->queue_index = queue; - tx_q->priv_data = priv; + tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size, + sizeof(*tx_q->tx_skbuff_dma), + GFP_KERNEL); + if (!tx_q->tx_skbuff_dma) + return -ENOMEM; - tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size, - sizeof(*tx_q->tx_skbuff_dma), - GFP_KERNEL); - if (!tx_q->tx_skbuff_dma) - goto err_dma; + tx_q->tx_skbuff = kcalloc(priv->dma_tx_size, + sizeof(struct sk_buff *), + GFP_KERNEL); + if (!tx_q->tx_skbuff) + return -ENOMEM; - tx_q->tx_skbuff = kcalloc(priv->dma_tx_size, - sizeof(struct sk_buff *), - GFP_KERNEL); - if (!tx_q->tx_skbuff) - goto err_dma; + if (priv->extend_desc) + size = sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + size = sizeof(struct dma_edesc); + else + size = sizeof(struct dma_desc); - if (priv->extend_desc) - size = sizeof(struct dma_extended_desc); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - size = sizeof(struct dma_edesc); - else - size = sizeof(struct dma_desc); + size *= priv->dma_tx_size; - size *= priv->dma_tx_size; + addr = dma_alloc_coherent(priv->device, size, + &tx_q->dma_tx_phy, GFP_KERNEL); + if (!addr) + return -ENOMEM; - addr = dma_alloc_coherent(priv->device, size, - &tx_q->dma_tx_phy, GFP_KERNEL); - if (!addr) - goto err_dma; + if (priv->extend_desc) + tx_q->dma_etx = addr; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_q->dma_entx = addr; + else + tx_q->dma_tx = addr; - if (priv->extend_desc) - tx_q->dma_etx = addr; - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - tx_q->dma_entx = addr; - else - tx_q->dma_tx = addr; + return 0; +} + +static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv) +{ + u32 tx_count = priv->plat->tx_queues_to_use; + u32 queue; + int ret; + + /* TX queues buffers and DMA */ + for (queue = 0; queue < tx_count; queue++) { + ret = __alloc_dma_tx_desc_resources(priv, queue); + if (ret) + goto err_dma; } return 0; @@ -2182,12 +2315,24 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) /* configure all channels */ for (chan = 0; chan < rx_channels_count; chan++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; + u32 buf_size; + qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, qmode); - stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz, - chan); + + if (rx_q->xsk_pool) { + buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); + stmmac_set_dma_bfsize(priv, priv->ioaddr, + buf_size, + chan); + } else { + stmmac_set_dma_bfsize(priv, priv->ioaddr, + priv->dma_buf_sz, + chan); + } } for (chan = 0; chan < tx_channels_count; chan++) { @@ -2198,6 +2343,101 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) } } +static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) +{ + struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + struct xsk_buff_pool *pool = tx_q->xsk_pool; + unsigned int entry = tx_q->cur_tx; + struct dma_desc *tx_desc = NULL; + struct xdp_desc xdp_desc; + bool work_done = true; + + /* Avoids TX time-out as we are sharing with slow path */ + nq->trans_start = jiffies; + + budget = min(budget, stmmac_tx_avail(priv, queue)); + + while (budget-- > 0) { + dma_addr_t dma_addr; + bool set_ic; + + /* We are sharing with slow path and stop XSK TX desc submission when + * available TX ring is less than threshold. + */ + if (unlikely(stmmac_tx_avail(priv, queue) < STMMAC_TX_XSK_AVAIL) || + !netif_carrier_ok(priv->dev)) { + work_done = false; + break; + } + + if (!xsk_tx_peek_desc(pool, &xdp_desc)) + break; + + if (likely(priv->extend_desc)) + tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_desc = &tx_q->dma_entx[entry].basic; + else + tx_desc = tx_q->dma_tx + entry; + + dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len); + + tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX; + + /* To return XDP buffer to XSK pool, we simple call + * xsk_tx_completed(), so we don't need to fill up + * 'buf' and 'xdpf'. + */ + tx_q->tx_skbuff_dma[entry].buf = 0; + tx_q->xdpf[entry] = NULL; + + tx_q->tx_skbuff_dma[entry].map_as_page = false; + tx_q->tx_skbuff_dma[entry].len = xdp_desc.len; + tx_q->tx_skbuff_dma[entry].last_segment = true; + tx_q->tx_skbuff_dma[entry].is_jumbo = false; + + stmmac_set_desc_addr(priv, tx_desc, dma_addr); + + tx_q->tx_count_frames++; + + if (!priv->tx_coal_frames[queue]) + set_ic = false; + else if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0) + set_ic = true; + else + set_ic = false; + + if (set_ic) { + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, tx_desc); + priv->xstats.tx_set_ic_bit++; + } + + stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len, + true, priv->mode, true, true, + xdp_desc.len); + + stmmac_enable_dma_transmission(priv, priv->ioaddr); + + tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_tx_size); + entry = tx_q->cur_tx; + } + + if (tx_desc) { + stmmac_flush_tx_descriptors(priv, queue); + xsk_tx_release(pool); + } + + /* Return true if all of the 3 conditions are met + * a) TX Budget is still available + * b) work_done = true when XSK TX desc peek is empty (no more + * pending XSK TX for transmission) + */ + return !!budget && work_done; +} + /** * stmmac_tx_clean - to manage the transmission completion * @priv: driver private structure @@ -2209,14 +2449,18 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; unsigned int bytes_compl = 0, pkts_compl = 0; - unsigned int entry, count = 0; + unsigned int entry, xmits = 0, count = 0; __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue)); priv->xstats.tx_clean++; + tx_q->xsk_frames_done = 0; + entry = tx_q->dirty_tx; - while ((entry != tx_q->cur_tx) && (count < budget)) { + + /* Try to clean all TX complete frame in 1 shot */ + while ((entry != tx_q->cur_tx) && count < priv->dma_tx_size) { struct xdp_frame *xdpf; struct sk_buff *skb; struct dma_desc *p; @@ -2301,6 +2545,9 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) tx_q->xdpf[entry] = NULL; } + if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XSK_TX) + tx_q->xsk_frames_done++; + if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) { if (likely(skb)) { pkts_compl++; @@ -2328,6 +2575,28 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue)); } + if (tx_q->xsk_pool) { + bool work_done; + + if (tx_q->xsk_frames_done) + xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done); + + if (xsk_uses_need_wakeup(tx_q->xsk_pool)) + xsk_set_tx_need_wakeup(tx_q->xsk_pool); + + /* For XSK TX, we try to send as many as possible. + * If XSK work done (XSK TX desc empty and budget still + * available), return "budget - 1" to reenable TX IRQ. + * Else, return "budget" to make NAPI continue polling. + */ + work_done = stmmac_xdp_xmit_zc(priv, queue, + STMMAC_XSK_TX_BUDGET_MAX); + if (work_done) + xmits = budget - 1; + else + xmits = budget; + } + if (priv->eee_enabled && !priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) { stmmac_enable_eee_mode(priv); @@ -2342,7 +2611,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); - return count; + /* Combine decisions from TX clean and XSK TX */ + return max(count, xmits); } /** @@ -2424,24 +2694,31 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir) { int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, &priv->xstats, chan, dir); + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; struct stmmac_channel *ch = &priv->channel[chan]; + struct napi_struct *rx_napi; + struct napi_struct *tx_napi; unsigned long flags; + rx_napi = rx_q->xsk_pool ? &ch->rxtx_napi : &ch->rx_napi; + tx_napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi; + if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { - if (napi_schedule_prep(&ch->rx_napi)) { + if (napi_schedule_prep(rx_napi)) { spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule(&ch->rx_napi); + __napi_schedule(rx_napi); } } if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { - if (napi_schedule_prep(&ch->tx_napi)) { + if (napi_schedule_prep(tx_napi)) { spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule(&ch->tx_napi); + __napi_schedule(tx_napi); } } @@ -2598,7 +2875,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) rx_q->dma_rx_phy, chan); rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (priv->dma_rx_size * + (rx_q->buf_alloc_num * sizeof(struct dma_desc)); stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, chan); @@ -2639,16 +2916,18 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t) struct stmmac_tx_queue *tx_q = container_of(t, struct stmmac_tx_queue, txtimer); struct stmmac_priv *priv = tx_q->priv_data; struct stmmac_channel *ch; + struct napi_struct *napi; ch = &priv->channel[tx_q->queue_index]; + napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi; - if (likely(napi_schedule_prep(&ch->tx_napi))) { + if (likely(napi_schedule_prep(napi))) { unsigned long flags; spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule(&ch->tx_napi); + __napi_schedule(napi); } return HRTIMER_NORESTART; @@ -4368,20 +4647,13 @@ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv, return res; } -static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv, - struct xdp_buff *xdp) +/* This function assumes rcu_read_lock() is held by the caller. */ +static int __stmmac_xdp_run_prog(struct stmmac_priv *priv, + struct bpf_prog *prog, + struct xdp_buff *xdp) { - struct bpf_prog *prog; - int res; u32 act; - - rcu_read_lock(); - - prog = READ_ONCE(priv->xdp_prog); - if (!prog) { - res = STMMAC_XDP_PASS; - goto unlock; - } + int res; act = bpf_prog_run_xdp(prog, xdp); switch (act) { @@ -4408,6 +4680,24 @@ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv, break; } + return res; +} + +static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv, + struct xdp_buff *xdp) +{ + struct bpf_prog *prog; + int res; + + rcu_read_lock(); + + prog = READ_ONCE(priv->xdp_prog); + if (!prog) { + res = STMMAC_XDP_PASS; + goto unlock; + } + + res = __stmmac_xdp_run_prog(priv, prog, xdp); unlock: rcu_read_unlock(); return ERR_PTR(-res); @@ -4428,6 +4718,302 @@ static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv, xdp_do_flush(); } +static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + struct sk_buff *skb; + + skb = __napi_alloc_skb(&ch->rxtx_napi, + xdp->data_end - xdp->data_hard_start, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); + if (metasize) + skb_metadata_set(skb, metasize); + + return skb; +} + +static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, + struct dma_desc *p, struct dma_desc *np, + struct xdp_buff *xdp) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned int len = xdp->data_end - xdp->data; + enum pkt_hash_types hash_type; + int coe = priv->hw->rx_csum; + struct sk_buff *skb; + u32 hash; + + skb = stmmac_construct_skb_zc(ch, xdp); + if (!skb) { + priv->dev->stats.rx_dropped++; + return; + } + + stmmac_get_rx_hwtstamp(priv, p, np, skb); + stmmac_rx_vlan(priv->dev, skb); + skb->protocol = eth_type_trans(skb, priv->dev); + + if (unlikely(!coe)) + skb_checksum_none_assert(skb); + else + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type)) + skb_set_hash(skb, hash, hash_type); + + skb_record_rx_queue(skb, queue); + napi_gro_receive(&ch->rxtx_napi, skb); + + priv->dev->stats.rx_packets++; + priv->dev->stats.rx_bytes += len; +} + +static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + unsigned int entry = rx_q->dirty_rx; + struct dma_desc *rx_desc = NULL; + bool ret = true; + + budget = min(budget, stmmac_rx_dirty(priv, queue)); + + while (budget-- > 0 && entry != rx_q->cur_rx) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; + dma_addr_t dma_addr; + bool use_rx_wd; + + if (!buf->xdp) { + buf->xdp = xsk_buff_alloc(rx_q->xsk_pool); + if (!buf->xdp) { + ret = false; + break; + } + } + + if (priv->extend_desc) + rx_desc = (struct dma_desc *)(rx_q->dma_erx + entry); + else + rx_desc = rx_q->dma_rx + entry; + + dma_addr = xsk_buff_xdp_get_dma(buf->xdp); + stmmac_set_desc_addr(priv, rx_desc, dma_addr); + stmmac_set_desc_sec_addr(priv, rx_desc, 0, false); + stmmac_refill_desc3(priv, rx_q, rx_desc); + + rx_q->rx_count_frames++; + rx_q->rx_count_frames += priv->rx_coal_frames[queue]; + if (rx_q->rx_count_frames > priv->rx_coal_frames[queue]) + rx_q->rx_count_frames = 0; + + use_rx_wd = !priv->rx_coal_frames[queue]; + use_rx_wd |= rx_q->rx_count_frames > 0; + if (!priv->use_riwt) + use_rx_wd = false; + + dma_wmb(); + stmmac_set_rx_owner(priv, rx_desc, use_rx_wd); + + entry = STMMAC_GET_ENTRY(entry, priv->dma_rx_size); + } + + if (rx_desc) { + rx_q->dirty_rx = entry; + rx_q->rx_tail_addr = rx_q->dma_rx_phy + + (rx_q->dirty_rx * sizeof(struct dma_desc)); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); + } + + return ret; +} + +static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + unsigned int count = 0, error = 0, len = 0; + int dirty = stmmac_rx_dirty(priv, queue); + unsigned int next_entry = rx_q->cur_rx; + unsigned int desc_size; + struct bpf_prog *prog; + bool failure = false; + int xdp_status = 0; + int status = 0; + + if (netif_msg_rx_status(priv)) { + void *rx_head; + + netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); + if (priv->extend_desc) { + rx_head = (void *)rx_q->dma_erx; + desc_size = sizeof(struct dma_extended_desc); + } else { + rx_head = (void *)rx_q->dma_rx; + desc_size = sizeof(struct dma_desc); + } + + stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true, + rx_q->dma_rx_phy, desc_size); + } + while (count < limit) { + struct stmmac_rx_buffer *buf; + unsigned int buf1_len = 0; + struct dma_desc *np, *p; + int entry; + int res; + + if (!count && rx_q->state_saved) { + error = rx_q->state.error; + len = rx_q->state.len; + } else { + rx_q->state_saved = false; + error = 0; + len = 0; + } + + if (count >= limit) + break; + +read_again: + buf1_len = 0; + entry = next_entry; + buf = &rx_q->buf_pool[entry]; + + if (dirty >= STMMAC_RX_FILL_BATCH) { + failure = failure || + !stmmac_rx_refill_zc(priv, queue, dirty); + dirty = 0; + } + + if (priv->extend_desc) + p = (struct dma_desc *)(rx_q->dma_erx + entry); + else + p = rx_q->dma_rx + entry; + + /* read the status of the incoming frame */ + status = stmmac_rx_status(priv, &priv->dev->stats, + &priv->xstats, p); + /* check if managed by the DMA otherwise go ahead */ + if (unlikely(status & dma_own)) + break; + + /* Prefetch the next RX descriptor */ + rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, + priv->dma_rx_size); + next_entry = rx_q->cur_rx; + + if (priv->extend_desc) + np = (struct dma_desc *)(rx_q->dma_erx + next_entry); + else + np = rx_q->dma_rx + next_entry; + + prefetch(np); + + if (priv->extend_desc) + stmmac_rx_extended_status(priv, &priv->dev->stats, + &priv->xstats, + rx_q->dma_erx + entry); + if (unlikely(status == discard_frame)) { + xsk_buff_free(buf->xdp); + buf->xdp = NULL; + dirty++; + error = 1; + if (!priv->hwts_rx_en) + priv->dev->stats.rx_errors++; + } + + if (unlikely(error && (status & rx_not_ls))) + goto read_again; + if (unlikely(error)) { + count++; + continue; + } + + /* Ensure a valid XSK buffer before proceed */ + if (!buf->xdp) + break; + + /* XSK pool expects RX frame 1:1 mapped to XSK buffer */ + if (likely(status & rx_not_ls)) { + xsk_buff_free(buf->xdp); + buf->xdp = NULL; + dirty++; + count++; + goto read_again; + } + + /* XDP ZC Frame only support primary buffers for now */ + buf1_len = stmmac_rx_buf1_len(priv, p, status, len); + len += buf1_len; + + /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 + * Type frames (LLC/LLC-SNAP) + * + * llc_snap is never checked in GMAC >= 4, so this ACS + * feature is always disabled and packets need to be + * stripped manually. + */ + if (likely(!(status & rx_not_ls)) && + (likely(priv->synopsys_id >= DWMAC_CORE_4_00) || + unlikely(status != llc_snap))) { + buf1_len -= ETH_FCS_LEN; + len -= ETH_FCS_LEN; + } + + /* RX buffer is good and fit into a XSK pool buffer */ + buf->xdp->data_end = buf->xdp->data + buf1_len; + xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool); + + rcu_read_lock(); + prog = READ_ONCE(priv->xdp_prog); + res = __stmmac_xdp_run_prog(priv, prog, buf->xdp); + rcu_read_unlock(); + + switch (res) { + case STMMAC_XDP_PASS: + stmmac_dispatch_skb_zc(priv, queue, p, np, buf->xdp); + xsk_buff_free(buf->xdp); + break; + case STMMAC_XDP_CONSUMED: + xsk_buff_free(buf->xdp); + priv->dev->stats.rx_dropped++; + break; + case STMMAC_XDP_TX: + case STMMAC_XDP_REDIRECT: + xdp_status |= res; + break; + } + + buf->xdp = NULL; + dirty++; + count++; + } + + if (status & rx_not_ls) { + rx_q->state_saved = true; + rx_q->state.error = error; + rx_q->state.len = len; + } + + stmmac_finalize_xdp_rx(priv, xdp_status); + + if (xsk_uses_need_wakeup(rx_q->xsk_pool)) { + if (failure || stmmac_rx_dirty(priv, queue) > 0) + xsk_set_rx_need_wakeup(rx_q->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx_q->xsk_pool); + + return (int)count; + } + + return failure ? limit : (int)count; +} + /** * stmmac_rx - manage the receive process * @priv: driver private structure @@ -4742,7 +5328,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) priv->xstats.napi_poll++; - work_done = stmmac_tx_clean(priv, priv->dma_tx_size, chan); + work_done = stmmac_tx_clean(priv, budget, chan); work_done = min(work_done, budget); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -4756,6 +5342,42 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) return work_done; } +static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget) +{ + struct stmmac_channel *ch = + container_of(napi, struct stmmac_channel, rxtx_napi); + struct stmmac_priv *priv = ch->priv_data; + int rx_done, tx_done; + u32 chan = ch->index; + + priv->xstats.napi_poll++; + + tx_done = stmmac_tx_clean(priv, budget, chan); + tx_done = min(tx_done, budget); + + rx_done = stmmac_rx_zc(priv, budget, chan); + + /* If either TX or RX work is not complete, return budget + * and keep pooling + */ + if (tx_done >= budget || rx_done >= budget) + return budget; + + /* all work done, exit the polling mode */ + if (napi_complete_done(napi, rx_done)) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + /* Both RX and TX work done are compelte, + * so enable both RX & TX IRQs. + */ + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1); + spin_unlock_irqrestore(&ch->lock, flags); + } + + return min(rx_done, budget - 1); +} + /** * stmmac_tx_timeout * @dev : Pointer to net device structure @@ -4989,6 +5611,8 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv) else netif_carrier_off(priv->dev); } + + stmmac_timestamp_interrupt(priv, priv); } } @@ -5203,7 +5827,7 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data, if (!tc_cls_can_offload_and_chain0(priv->dev, type_data)) return ret; - stmmac_disable_all_queues(priv); + __stmmac_disable_all_queues(priv); switch (type) { case TC_SETUP_CLSU32: @@ -5624,6 +6248,9 @@ static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf) switch (bpf->command) { case XDP_SETUP_PROG: return stmmac_xdp_set_prog(priv, bpf->prog, bpf->extack); + case XDP_SETUP_XSK_POOL: + return stmmac_xdp_setup_pool(priv, bpf->xsk.pool, + bpf->xsk.queue_id); default: return -EOPNOTSUPP; } @@ -5671,6 +6298,156 @@ static int stmmac_xdp_xmit(struct net_device *dev, int num_frames, return nxmit; } +void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); + + stmmac_stop_rx_dma(priv, queue); + __free_dma_rx_desc_resources(priv, queue); +} + +void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + u32 buf_size; + int ret; + + ret = __alloc_dma_rx_desc_resources(priv, queue); + if (ret) { + netdev_err(priv->dev, "Failed to alloc RX desc.\n"); + return; + } + + ret = __init_dma_rx_desc_rings(priv, queue, GFP_KERNEL); + if (ret) { + __free_dma_rx_desc_resources(priv, queue); + netdev_err(priv->dev, "Failed to init RX desc.\n"); + return; + } + + stmmac_clear_rx_descriptors(priv, queue); + + stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + rx_q->dma_rx_phy, rx_q->queue_index); + + rx_q->rx_tail_addr = rx_q->dma_rx_phy + (rx_q->buf_alloc_num * + sizeof(struct dma_desc)); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, + rx_q->rx_tail_addr, rx_q->queue_index); + + if (rx_q->xsk_pool && rx_q->buf_alloc_num) { + buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); + stmmac_set_dma_bfsize(priv, priv->ioaddr, + buf_size, + rx_q->queue_index); + } else { + stmmac_set_dma_bfsize(priv, priv->ioaddr, + priv->dma_buf_sz, + rx_q->queue_index); + } + + stmmac_start_rx_dma(priv, queue); + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); +} + +void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); + + stmmac_stop_tx_dma(priv, queue); + __free_dma_tx_desc_resources(priv, queue); +} + +void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + int ret; + + ret = __alloc_dma_tx_desc_resources(priv, queue); + if (ret) { + netdev_err(priv->dev, "Failed to alloc TX desc.\n"); + return; + } + + ret = __init_dma_tx_desc_rings(priv, queue); + if (ret) { + __free_dma_tx_desc_resources(priv, queue); + netdev_err(priv->dev, "Failed to init TX desc.\n"); + return; + } + + stmmac_clear_tx_descriptors(priv, queue); + + stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, tx_q->queue_index); + + if (tx_q->tbs & STMMAC_TBS_AVAIL) + stmmac_enable_tbs(priv, priv->ioaddr, 1, tx_q->queue_index); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy; + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, + tx_q->tx_tail_addr, tx_q->queue_index); + + stmmac_start_tx_dma(priv, queue); + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); +} + +int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct stmmac_rx_queue *rx_q; + struct stmmac_tx_queue *tx_q; + struct stmmac_channel *ch; + + if (test_bit(STMMAC_DOWN, &priv->state) || + !netif_carrier_ok(priv->dev)) + return -ENETDOWN; + + if (!stmmac_xdp_is_enabled(priv)) + return -ENXIO; + + if (queue >= priv->plat->rx_queues_to_use || + queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + + rx_q = &priv->rx_queue[queue]; + tx_q = &priv->tx_queue[queue]; + ch = &priv->channel[queue]; + + if (!rx_q->xsk_pool && !tx_q->xsk_pool) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&ch->rxtx_napi)) { + /* EQoS does not have per-DMA channel SW interrupt, + * so we schedule RX Napi straight-away. + */ + if (likely(napi_schedule_prep(&ch->rxtx_napi))) + __napi_schedule(&ch->rxtx_napi); + } + + return 0; +} + static const struct net_device_ops stmmac_netdev_ops = { .ndo_open = stmmac_open, .ndo_start_xmit = stmmac_xmit, @@ -5691,6 +6468,7 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid, .ndo_bpf = stmmac_bpf, .ndo_xdp_xmit = stmmac_xdp_xmit, + .ndo_xsk_wakeup = stmmac_xsk_wakeup, }; static void stmmac_reset_subtask(struct stmmac_priv *priv) @@ -5849,6 +6627,12 @@ static void stmmac_napi_add(struct net_device *dev) stmmac_napi_poll_tx, NAPI_POLL_WEIGHT); } + if (queue < priv->plat->rx_queues_to_use && + queue < priv->plat->tx_queues_to_use) { + netif_napi_add(dev, &ch->rxtx_napi, + stmmac_napi_poll_rxtx, + NAPI_POLL_WEIGHT); + } } } @@ -5866,6 +6650,10 @@ static void stmmac_napi_del(struct net_device *dev) netif_napi_del(&ch->rx_napi); if (queue < priv->plat->tx_queues_to_use) netif_napi_del(&ch->tx_napi); + if (queue < priv->plat->rx_queues_to_use && + queue < priv->plat->tx_queues_to_use) { + netif_napi_del(&ch->rxtx_napi); + } } } @@ -6016,7 +6804,7 @@ int stmmac_dvr_probe(struct device *device, for (i = 0; i < MTL_MAX_TX_QUEUES; i++) priv->tx_irq[i] = res->tx_irq[i]; - if (!IS_ERR_OR_NULL(res->mac)) + if (!is_zero_ether_addr(res->mac)) memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN); dev_set_drvdata(device, priv->dev); @@ -6024,6 +6812,10 @@ int stmmac_dvr_probe(struct device *device, /* Verify driver arguments */ stmmac_verify_args(); + priv->af_xdp_zc_qps = bitmap_zalloc(MTL_MAX_TX_QUEUES, GFP_KERNEL); + if (!priv->af_xdp_zc_qps) + return -ENOMEM; + /* Allocate workqueue */ priv->wq = create_singlethread_workqueue("stmmac_wq"); if (!priv->wq) { @@ -6243,6 +7035,7 @@ error_mdio_register: error_hw_init: destroy_workqueue(priv->wq); stmmac_bus_clks_config(priv, false); + bitmap_free(priv->af_xdp_zc_qps); return ret; } @@ -6285,6 +7078,7 @@ int stmmac_dvr_remove(struct device *dev) stmmac_mdio_unregister(ndev); destroy_workqueue(priv->wq); mutex_destroy(&priv->lock); + bitmap_free(priv->af_xdp_zc_qps); return 0; } @@ -6456,7 +7250,7 @@ int stmmac_resume(struct device *dev) mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); - stmmac_reinit_rx_buffers(priv); + stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5a1e018884e6..1e17a23d9118 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -394,7 +394,7 @@ static int stmmac_of_get_mac_mode(struct device_node *np) * set some private fields that will be used by the main at runtime. */ struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) +stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) { struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; @@ -406,12 +406,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) if (!plat) return ERR_PTR(-ENOMEM); - *mac = of_get_mac_address(np); - if (IS_ERR(*mac)) { - if (PTR_ERR(*mac) == -EPROBE_DEFER) - return ERR_CAST(*mac); + rc = of_get_mac_address(np, mac); + if (rc) { + if (rc == -EPROBE_DEFER) + return ERR_PTR(rc); - *mac = NULL; + eth_zero_addr(mac); } plat->phy_interface = device_get_phy_mode(&pdev->dev); @@ -627,7 +627,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev, } #else struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) +stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) { return ERR_PTR(-EINVAL); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 3a4663b7b460..3fff3f59d73d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -12,7 +12,7 @@ #include "stmmac.h" struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, const char **mac); +stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index b164ae22e35f..4e86cdf2bc9f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -135,7 +135,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, { struct stmmac_priv *priv = container_of(ptp, struct stmmac_priv, ptp_clock_ops); + void __iomem *ptpaddr = priv->ptpaddr; + void __iomem *ioaddr = priv->hw->pcsr; struct stmmac_pps_cfg *cfg; + u32 intr_value, acr_value; int ret = -EOPNOTSUPP; unsigned long flags; @@ -159,6 +162,37 @@ static int stmmac_enable(struct ptp_clock_info *ptp, priv->systime_flags); spin_unlock_irqrestore(&priv->ptp_lock, flags); break; + case PTP_CLK_REQ_EXTTS: + priv->plat->ext_snapshot_en = on; + mutex_lock(&priv->aux_ts_lock); + acr_value = readl(ptpaddr + PTP_ACR); + acr_value &= ~PTP_ACR_MASK; + if (on) { + /* Enable External snapshot trigger */ + acr_value |= priv->plat->ext_snapshot_num; + acr_value |= PTP_ACR_ATSFC; + netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n", + priv->plat->ext_snapshot_num >> + PTP_ACR_ATSEN_SHIFT); + /* Enable Timestamp Interrupt */ + intr_value = readl(ioaddr + GMAC_INT_EN); + intr_value |= GMAC_INT_TSIE; + writel(intr_value, ioaddr + GMAC_INT_EN); + + } else { + netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n", + priv->plat->ext_snapshot_num >> + PTP_ACR_ATSEN_SHIFT); + /* Disable Timestamp Interrupt */ + intr_value = readl(ioaddr + GMAC_INT_EN); + intr_value &= ~GMAC_INT_TSIE; + writel(intr_value, ioaddr + GMAC_INT_EN); + } + writel(acr_value, ptpaddr + PTP_ACR); + mutex_unlock(&priv->aux_ts_lock); + ret = 0; + break; + default: break; } @@ -202,7 +236,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = { .name = "stmmac ptp", .max_adj = 62500000, .n_alarm = 0, - .n_ext_ts = 0, + .n_ext_ts = 0, /* will be overwritten in stmmac_ptp_register */ .n_per_out = 0, /* will be overwritten in stmmac_ptp_register */ .n_pins = 0, .pps = 0, @@ -237,8 +271,10 @@ void stmmac_ptp_register(struct stmmac_priv *priv) stmmac_ptp_clock_ops.max_adj = priv->plat->ptp_max_adj; stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num; + stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n; spin_lock_init(&priv->ptp_lock); + mutex_init(&priv->aux_ts_lock); priv->ptp_clock_ops = stmmac_ptp_clock_ops; priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, @@ -264,4 +300,6 @@ void stmmac_ptp_unregister(struct stmmac_priv *priv) pr_debug("Removed PTP HW clock successfully on %s\n", priv->dev->name); } + + mutex_destroy(&priv->aux_ts_lock); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index f88727ce4d30..53172a439810 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -73,6 +73,7 @@ #define PTP_ACR_ATSEN1 BIT(5) /* Auxiliary Snapshot 1 Enable */ #define PTP_ACR_ATSEN2 BIT(6) /* Auxiliary Snapshot 2 Enable */ #define PTP_ACR_ATSEN3 BIT(7) /* Auxiliary Snapshot 3 Enable */ +#define PTP_ACR_ATSEN_SHIFT 5 /* Auxiliary Snapshot shift */ #define PTP_ACR_MASK GENMASK(7, 4) /* Aux Snapshot Mask */ #define PMC_ART_VALUE0 0x01 /* PMC_ART[15:0] timer value */ #define PMC_ART_VALUE1 0x02 /* PMC_ART[31:16] timer value */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c index bf38d231860b..105821b53020 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c @@ -1,9 +1,104 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021, Intel Corporation. */ +#include <net/xdp_sock_drv.h> + #include "stmmac.h" #include "stmmac_xdp.h" +static int stmmac_xdp_enable_pool(struct stmmac_priv *priv, + struct xsk_buff_pool *pool, u16 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + bool need_update; + u32 frame_size; + int err; + + if (queue >= priv->plat->rx_queues_to_use || + queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + + frame_size = xsk_pool_get_rx_frame_size(pool); + /* XDP ZC does not span multiple frame, make sure XSK pool buffer + * size can at least store Q-in-Q frame. + */ + if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2) + return -EOPNOTSUPP; + + err = xsk_pool_dma_map(pool, priv->device, STMMAC_RX_DMA_ATTR); + if (err) { + netdev_err(priv->dev, "Failed to map xsk pool\n"); + return err; + } + + need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv); + + if (need_update) { + stmmac_disable_rx_queue(priv, queue); + stmmac_disable_tx_queue(priv, queue); + napi_disable(&ch->rx_napi); + napi_disable(&ch->tx_napi); + } + + set_bit(queue, priv->af_xdp_zc_qps); + + if (need_update) { + napi_enable(&ch->rxtx_napi); + stmmac_enable_rx_queue(priv, queue); + stmmac_enable_tx_queue(priv, queue); + + err = stmmac_xsk_wakeup(priv->dev, queue, XDP_WAKEUP_RX); + if (err) + return err; + } + + return 0; +} + +static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + struct xsk_buff_pool *pool; + bool need_update; + + if (queue >= priv->plat->rx_queues_to_use || + queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + + pool = xsk_get_pool_from_qid(priv->dev, queue); + if (!pool) + return -EINVAL; + + need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv); + + if (need_update) { + stmmac_disable_rx_queue(priv, queue); + stmmac_disable_tx_queue(priv, queue); + synchronize_rcu(); + napi_disable(&ch->rxtx_napi); + } + + xsk_pool_dma_unmap(pool, STMMAC_RX_DMA_ATTR); + + clear_bit(queue, priv->af_xdp_zc_qps); + + if (need_update) { + napi_enable(&ch->rx_napi); + napi_enable(&ch->tx_napi); + stmmac_enable_rx_queue(priv, queue); + stmmac_enable_tx_queue(priv, queue); + } + + return 0; +} + +int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool, + u16 queue) +{ + return pool ? stmmac_xdp_enable_pool(priv, pool, queue) : + stmmac_xdp_disable_pool(priv, queue); +} + int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, struct netlink_ext_ack *extack) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h index 93948569d92a..896dc987d4ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h @@ -5,7 +5,10 @@ #define _STMMAC_XDP_H_ #define STMMAC_MAX_RX_BUF_SIZE(num) (((num) * PAGE_SIZE) - XDP_PACKET_HEADROOM) +#define STMMAC_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool, + u16 queue); int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 638d7b03be4b..6a67b026df0b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1824,7 +1824,6 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) for_each_child_of_node(node, port_np) { struct am65_cpsw_port *port; - const void *mac_addr; u32 port_id; /* it is not a slave port node, continue */ @@ -1903,15 +1902,15 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) return ret; } - mac_addr = of_get_mac_address(port_np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(port->slave.mac_addr, mac_addr); - } else if (am65_cpsw_am654_get_efuse_macid(port_np, - port->port_id, - port->slave.mac_addr) || - !is_valid_ether_addr(port->slave.mac_addr)) { - random_ether_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + ret = of_get_mac_address(port_np, port->slave.mac_addr); + if (ret) { + am65_cpsw_am654_get_efuse_macid(port_np, + port->port_id, + port->slave.mac_addr); + if (!is_valid_ether_addr(port->slave.mac_addr)) { + random_ether_addr(port->slave.mac_addr); + dev_err(dev, "Use random MAC address\n"); + } } } of_node_put(node); diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c index d93ffd8a08b0..23cfb91e9c4d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -385,7 +385,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port_id); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port_id = HOST_PORT_NUM; @@ -401,7 +401,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port_id); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port_id = HOST_PORT_NUM; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 074702af3dc6..c0cd7de88316 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1296,7 +1296,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, for_each_available_child_of_node(node, slave_node) { struct cpsw_slave_data *slave_data = data->slave_data + i; - const void *mac_addr = NULL; int lenp; const __be32 *parp; @@ -1368,10 +1367,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } no_phy_slave: - mac_addr = of_get_mac_address(slave_node); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(slave_data->mac_addr, mac_addr); - } else { + ret = of_get_mac_address(slave_node, slave_data->mac_addr); + if (ret) { ret = ti_cm_get_macid(&pdev->dev, i, slave_data->mac_addr); if (ret) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 0751f77de2c7..69b7a4e0220a 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1257,7 +1257,6 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) for_each_child_of_node(tmp_node, port_np) { struct cpsw_slave_data *slave_data; - const void *mac_addr; u32 port_id; ret = of_property_read_u32(port_np, "reg", &port_id); @@ -1316,10 +1315,8 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) goto err_node_put; } - mac_addr = of_get_mac_address(port_np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(slave_data->mac_addr, mac_addr); - } else { + ret = of_get_mac_address(port_np, slave_data->mac_addr); + if (ret) { ret = ti_cm_get_macid(dev, port_id - 1, slave_data->mac_addr); if (ret) diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index a72bb570756f..05a64fb7a04f 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -395,7 +395,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port = HOST_PORT_NUM; @@ -411,7 +411,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port = HOST_PORT_NUM; diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index c7031e1960d4..14e7da7d302f 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1687,7 +1687,6 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) const struct of_device_id *match; const struct emac_platform_data *auxdata; struct emac_platform_data *pdata = NULL; - const u8 *mac_addr; if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node) return dev_get_platdata(&pdev->dev); @@ -1699,11 +1698,8 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) np = pdev->dev.of_node; pdata->version = EMAC_VERSION_2; - if (!is_valid_ether_addr(pdata->mac_addr)) { - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(pdata->mac_addr, mac_addr); - } + if (!is_valid_ether_addr(pdata->mac_addr)) + of_get_mac_address(np, pdata->mac_addr); of_property_read_u32(np, "ti,davinci-ctrl-reg-offset", &pdata->ctrl_reg_offset); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index d7a144b4a09f..9030e619e543 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1966,7 +1966,6 @@ static int netcp_create_interface(struct netcp_device *netcp_device, struct resource res; void __iomem *efuse = NULL; u32 efuse_mac = 0; - const void *mac_addr; u8 efuse_mac_addr[6]; u32 temp[2]; int ret = 0; @@ -2036,10 +2035,8 @@ static int netcp_create_interface(struct netcp_device *netcp_device, devm_iounmap(dev, efuse); devm_release_mem_region(dev, res.start, size); } else { - mac_addr = of_get_mac_address(node_interface); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - else + ret = of_get_mac_address(node_interface, ndev->dev_addr); + if (ret) eth_random_addr(ndev->dev_addr); } diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 2b4126d2427d..2b84848dc26a 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -423,8 +423,14 @@ static int w5100_spi_probe(struct spi_device *spi) const struct of_device_id *of_id; const struct w5100_ops *ops; kernel_ulong_t driver_data; + const void *mac = NULL; + u8 tmpmac[ETH_ALEN]; int priv_size; - const void *mac = of_get_mac_address(spi->dev.of_node); + int ret; + + ret = of_get_mac_address(spi->dev.of_node, tmpmac); + if (!ret) + mac = tmpmac; if (spi->dev.of_node) { of_id = of_match_device(w5100_of_match, &spi->dev); diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index c0d181a7f83a..ec5db481c9cd 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1157,7 +1157,7 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops, INIT_WORK(&priv->setrx_work, w5100_setrx_work); INIT_WORK(&priv->restart_work, w5100_restart_work); - if (!IS_ERR_OR_NULL(mac_addr)) + if (mac_addr) memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); else eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index c6eb7f2368aa..911b5ef9e680 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -18,12 +18,14 @@ if NET_VENDOR_XILINX config XILINX_EMACLITE tristate "Xilinx 10/100 Ethernet Lite support" + depends on HAS_IOMEM select PHYLIB help This driver supports the 10/100 Ethernet Lite from Xilinx. config XILINX_AXI_EMAC tristate "Xilinx 10/100/1000 AXI Ethernet support" + depends on HAS_IOMEM select PHYLINK help This driver supports the 10/100/1000 Ethernet from Xilinx for the @@ -31,6 +33,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" + depends on HAS_IOMEM select PHYLIB help This driver supports the Xilinx 10/100/1000 LocalLink TEMAC diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 030185301014..a1f5f07f4ca9 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -438,7 +438,7 @@ static void temac_do_set_mac_address(struct net_device *ndev) static int temac_init_mac_address(struct net_device *ndev, const void *address) { - ether_addr_copy(ndev->dev_addr, address); + memcpy(ndev->dev_addr, address, ETH_ALEN); if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); temac_do_set_mac_address(ndev); @@ -1351,7 +1351,7 @@ static int temac_probe(struct platform_device *pdev) struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np; struct temac_local *lp; struct net_device *ndev; - const void *addr; + u8 addr[ETH_ALEN]; __be32 *p; bool little_endian; int rc = 0; @@ -1542,8 +1542,8 @@ static int temac_probe(struct platform_device *pdev) if (temac_np) { /* Retrieve the MAC address */ - addr = of_get_mac_address(temac_np); - if (IS_ERR(addr)) { + rc = of_get_mac_address(temac_np, addr); + if (rc) { dev_err(&pdev->dev, "could not find MAC address\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 708769349f76..5b4d153b1492 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -508,6 +508,18 @@ static inline u32 axinet_ior_read_mcr(struct axienet_local *lp) return axienet_ior(lp, XAE_MDIO_MCR_OFFSET); } +static inline void axienet_lock_mii(struct axienet_local *lp) +{ + if (lp->mii_bus) + mutex_lock(&lp->mii_bus->mdio_lock); +} + +static inline void axienet_unlock_mii(struct axienet_local *lp) +{ + if (lp->mii_bus) + mutex_unlock(&lp->mii_bus->mdio_lock); +} + /** * axienet_iow - Memory mapped Axi Ethernet register write * @lp: Pointer to axienet local structure diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 92cf9051d557..b508c9453f40 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1053,9 +1053,9 @@ static int axienet_open(struct net_device *ndev) * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); ret = axienet_device_reset(ndev); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp); ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); if (ret) { @@ -1148,9 +1148,9 @@ static int axienet_stop(struct net_device *ndev) } /* Do a reset to ensure DMA is really stopped */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp); cancel_work_sync(&lp->dma_err_task); @@ -1709,9 +1709,9 @@ static void axienet_dma_err_handler(struct work_struct *work) * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp); for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; @@ -1835,8 +1835,8 @@ static int axienet_probe(struct platform_device *pdev) struct device_node *np; struct axienet_local *lp; struct net_device *ndev; - const void *mac_addr; struct resource *ethres; + u8 mac_addr[ETH_ALEN]; int addr_width = 32; u32 value; @@ -2062,13 +2062,14 @@ static int axienet_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Ethernet core IRQ not defined\n"); /* Retrieve the MAC address */ - mac_addr = of_get_mac_address(pdev->dev.of_node); - if (IS_ERR(mac_addr)) { - dev_warn(&pdev->dev, "could not find MAC address property: %ld\n", - PTR_ERR(mac_addr)); - mac_addr = NULL; + ret = of_get_mac_address(pdev->dev.of_node, mac_addr); + if (!ret) { + axienet_set_mac_address(ndev, mac_addr); + } else { + dev_warn(&pdev->dev, "could not find MAC address property: %d\n", + ret); + axienet_set_mac_address(ndev, NULL); } - axienet_set_mac_address(ndev, mac_addr); lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 007840d4a807..d9d58a7dabee 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1115,7 +1115,6 @@ static int xemaclite_of_probe(struct platform_device *ofdev) struct net_device *ndev = NULL; struct net_local *lp = NULL; struct device *dev = &ofdev->dev; - const void *mac_address; int rc = 0; @@ -1157,12 +1156,9 @@ static int xemaclite_of_probe(struct platform_device *ofdev) lp->next_rx_buf_to_use = 0x0; lp->tx_ping_pong = get_bool(ofdev, "xlnx,tx-ping-pong"); lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong"); - mac_address = of_get_mac_address(ofdev->dev.of_node); - if (!IS_ERR(mac_address)) { - /* Set the MAC address. */ - ether_addr_copy(ndev->dev_addr, mac_address); - } else { + rc = of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr); + if (rc) { dev_warn(dev, "No MAC address found, using random\n"); eth_hw_addr_random(ndev); } |