diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
83 files changed, 5999 insertions, 864 deletions
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index c1d155690341..82744a7501c7 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -241,6 +241,7 @@ config I40E tristate "Intel(R) Ethernet Controller XL710 Family support" imply PTP_1588_CLOCK depends on PCI + select AUXILIARY_BUS help This driver supports Intel(R) Ethernet Controller XL710 Family of devices. For more information on how to identify your adapter, go @@ -294,9 +295,11 @@ config ICE tristate "Intel(R) Ethernet Connection E800 Series Support" default n depends on PCI_MSI + select AUXILIARY_BUS select DIMLIB select NET_DEVLINK select PLDMFW + imply PTP_1588_CLOCK help This driver supports Intel(R) Ethernet Connection E800 Series of devices. For more information on how to identify your adapter, go diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index f8d78af76d7d..1b0958bd24f6 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1395,7 +1395,7 @@ static int e100_phy_check_without_mii(struct nic *nic) u8 phy_type; int without_mii; - phy_type = (nic->eeprom[eeprom_phy_iface] >> 8) & 0x0f; + phy_type = (le16_to_cpu(nic->eeprom[eeprom_phy_iface]) >> 8) & 0x0f; switch (phy_type) { case NoSuchPhy: /* Non-MII PHY; UNTESTED! */ @@ -1515,7 +1515,7 @@ static int e100_phy_init(struct nic *nic) mdio_write(netdev, nic->mii.phy_id, MII_BMCR, bmcr); } else if ((nic->mac >= mac_82550_D102) || ((nic->flags & ich) && (mdio_read(netdev, nic->mii.phy_id, MII_TPISTATUS) & 0x8000) && - (nic->eeprom[eeprom_cnfg_mdix] & eeprom_mdix_enabled))) { + (le16_to_cpu(nic->eeprom[eeprom_cnfg_mdix]) & eeprom_mdix_enabled))) { /* enable/disable MDI/MDI-X auto-switching. */ mdio_write(netdev, nic->mii.phy_id, MII_NCONFIG, nic->mii.force_media ? 0 : NCONFIG_AUTO_SWITCH); @@ -2269,9 +2269,9 @@ static int e100_asf(struct nic *nic) { /* ASF can be enabled from eeprom */ return (nic->pdev->device >= 0x1050) && (nic->pdev->device <= 0x1057) && - (nic->eeprom[eeprom_config_asf] & eeprom_asf) && - !(nic->eeprom[eeprom_config_asf] & eeprom_gcl) && - ((nic->eeprom[eeprom_smbus_addr] & 0xFF) != 0xFE); + (le16_to_cpu(nic->eeprom[eeprom_config_asf]) & eeprom_asf) && + !(le16_to_cpu(nic->eeprom[eeprom_config_asf]) & eeprom_gcl) && + ((le16_to_cpu(nic->eeprom[eeprom_smbus_addr]) & 0xFF) != 0xFE); } static int e100_up(struct nic *nic) @@ -2926,7 +2926,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Wol magic packet can be enabled from eeprom */ if ((nic->mac >= mac_82558_D101_A4) && - (nic->eeprom[eeprom_id] & eeprom_id_wol)) { + (le16_to_cpu(nic->eeprom[eeprom_id]) & eeprom_id_wol)) { nic->flags |= wol_magic; device_set_wakeup_enable(&pdev->dev, true); } diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index f976e9daa3d8..3c51ee94fa00 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -513,7 +513,7 @@ static int e1000_set_eeprom(struct net_device *netdev, memcpy(ptr, bytes, eeprom->len); for (i = 0; i < last_word - first_word + 1; i++) - eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]); + cpu_to_le16s(&eeprom_buff[i]); ret_val = e1000_write_eeprom(hw, first_word, last_word - first_word + 1, eeprom_buff); diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 19cf36360933..1042e79a1397 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -2522,7 +2522,7 @@ s32 e1000_check_for_link(struct e1000_hw *hw) * turn it on. For compatibility with a TBI link * partner, we will store bad packets. Some * frames have an additional byte on the end and - * will look like CRC errors to to the hardware. + * will look like CRC errors to the hardware. */ if (!hw->tbi_compatibility_on) { hw->tbi_compatibility_on = true; @@ -2723,7 +2723,7 @@ static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, u32 data, u16 count) * e1000_shift_in_mdi_bits - Shifts data bits in from the PHY * @hw: Struct containing variables accessed by shared code * - * Bits are shifted in in MSB to LSB order. + * Bits are shifted in MSB to LSB order. */ static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 042de276e632..c2a109126c27 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -5245,7 +5245,7 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) pci_disable_device(pdev); - /* Request a slot slot reset. */ + /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 590ad110d383..cf7b3887da1d 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4639,7 +4639,7 @@ static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw) * @hw: pointer to the HW structure * * ICH8 use the PCI Express bus, but does not contain a PCI Express Capability - * register, so the the bus width is hard coded. + * register, so the bus width is hard coded. **/ static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw) { diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 88e9035b75cf..d150dade06cf 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5223,18 +5223,20 @@ static void e1000_watchdog_task(struct work_struct *work) pm_runtime_resume(netdev->dev.parent); /* Checking if MAC is in DMoff state*/ - pcim_state = er32(STATUS); - while (pcim_state & E1000_STATUS_PCIM_STATE) { - if (tries++ == dmoff_exit_timeout) { - e_dbg("Error in exiting dmoff\n"); - break; - } - usleep_range(10000, 20000); + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { pcim_state = er32(STATUS); - - /* Checking if MAC exited DMoff state */ - if (!(pcim_state & E1000_STATUS_PCIM_STATE)) - e1000_phy_hw_reset(&adapter->hw); + while (pcim_state & E1000_STATUS_PCIM_STATE) { + if (tries++ == dmoff_exit_timeout) { + e_dbg("Error in exiting dmoff\n"); + break; + } + usleep_range(10000, 20000); + pcim_state = er32(STATUS); + + /* Checking if MAC exited DMoff state */ + if (!(pcim_state & E1000_STATUS_PCIM_STATE)) + e1000_phy_hw_reset(&adapter->hw); + } } /* update snapshot of PHY registers on LSC */ @@ -7118,7 +7120,7 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, pci_disable_device(pdev); - /* Request a slot slot reset. */ + /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; } diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 1db35b2c7750..0f0efee5fc8e 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -2978,7 +2978,7 @@ static u32 e1000_get_phy_addr_for_hv_page(u32 page) * @data: pointer to the data to be read or written * @read: determines if operation is read or write * - * Reads the PHY register at offset and stores the retreived information + * Reads the PHY register at offset and stores the retrieved information * in data. Assumes semaphore already acquired. Note that the procedure * to access these regs uses the address port and data port to read/write. * These accesses done with PHY address 2 and without using pages. diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 9e3103fae723..dbcae92bb18d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1370,7 +1370,6 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data) struct fm10k_hw *hw = &interface->hw; struct fm10k_mbx_info *mbx = &hw->mbx; u32 eicr; - s32 err = 0; /* unmask any set bits related to this interrupt */ eicr = fm10k_read_reg(hw, FM10K_EICR); @@ -1386,15 +1385,16 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data) /* service mailboxes */ if (fm10k_mbx_trylock(interface)) { - err = mbx->ops.process(hw, mbx); + s32 err = mbx->ops.process(hw, mbx); + + if (err == FM10K_ERR_RESET_REQUESTED) + set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); + /* handle VFLRE events */ fm10k_iov_event(interface); fm10k_mbx_unlock(interface); } - if (err == FM10K_ERR_RESET_REQUESTED) - set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); - /* if switch toggled state we should reset GLORTs */ if (eicr & FM10K_EICR_SWITCHNOTREADY) { /* force link down for at least 4 seconds */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 85d3dd3a3339..b9417dc0007c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -870,6 +870,8 @@ struct i40e_netdev_priv { struct i40e_vsi *vsi; }; +extern struct ida i40e_client_ida; + /* struct that defines an interrupt vector */ struct i40e_q_vector { struct i40e_vsi *vsi; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 32f3facbed1a..e07ed065d3a4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -12,6 +12,7 @@ static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR; static struct i40e_client *registered_client; static LIST_HEAD(i40e_devices); static DEFINE_MUTEX(i40e_device_mutex); +DEFINE_IDA(i40e_client_ida); static int i40e_client_virtchnl_send(struct i40e_info *ldev, struct i40e_client *client, @@ -275,6 +276,57 @@ void i40e_client_update_msix_info(struct i40e_pf *pf) cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector]; } +static void i40e_auxiliary_dev_release(struct device *dev) +{ + struct i40e_auxiliary_device *i40e_aux_dev = + container_of(dev, struct i40e_auxiliary_device, aux_dev.dev); + + ida_free(&i40e_client_ida, i40e_aux_dev->aux_dev.id); + kfree(i40e_aux_dev); +} + +static int i40e_register_auxiliary_dev(struct i40e_info *ldev, const char *name) +{ + struct i40e_auxiliary_device *i40e_aux_dev; + struct pci_dev *pdev = ldev->pcidev; + struct auxiliary_device *aux_dev; + int ret; + + i40e_aux_dev = kzalloc(sizeof(*i40e_aux_dev), GFP_KERNEL); + if (!i40e_aux_dev) + return -ENOMEM; + + i40e_aux_dev->ldev = ldev; + + aux_dev = &i40e_aux_dev->aux_dev; + aux_dev->name = name; + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = i40e_auxiliary_dev_release; + ldev->aux_dev = aux_dev; + + ret = ida_alloc(&i40e_client_ida, GFP_KERNEL); + if (ret < 0) { + kfree(i40e_aux_dev); + return ret; + } + aux_dev->id = ret; + + ret = auxiliary_device_init(aux_dev); + if (ret < 0) { + ida_free(&i40e_client_ida, aux_dev->id); + kfree(i40e_aux_dev); + return ret; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + auxiliary_device_uninit(aux_dev); + return ret; + } + + return ret; +} + /** * i40e_client_add_instance - add a client instance struct to the instance list * @pf: pointer to the board struct @@ -286,9 +338,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf) struct netdev_hw_addr *mac = NULL; struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; - if (!registered_client || pf->cinst) - return; - cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) return; @@ -308,11 +357,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf) cdev->lan_info.fw_build = pf->hw.aq.fw_build; set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state); - if (i40e_client_get_params(vsi, &cdev->lan_info.params)) { - kfree(cdev); - cdev = NULL; - return; - } + if (i40e_client_get_params(vsi, &cdev->lan_info.params)) + goto free_cdev; mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list, struct netdev_hw_addr, list); @@ -324,7 +370,17 @@ static void i40e_client_add_instance(struct i40e_pf *pf) cdev->client = registered_client; pf->cinst = cdev; - i40e_client_update_msix_info(pf); + cdev->lan_info.msix_count = pf->num_iwarp_msix; + cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector]; + + if (i40e_register_auxiliary_dev(&cdev->lan_info, "iwarp")) + goto free_cdev; + + return; + +free_cdev: + kfree(cdev); + pf->cinst = NULL; } /** @@ -345,7 +401,7 @@ void i40e_client_del_instance(struct i40e_pf *pf) **/ void i40e_client_subtask(struct i40e_pf *pf) { - struct i40e_client *client = registered_client; + struct i40e_client *client; struct i40e_client_instance *cdev; struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; int ret = 0; @@ -359,9 +415,11 @@ void i40e_client_subtask(struct i40e_pf *pf) test_bit(__I40E_CONFIG_BUSY, pf->state)) return; - if (!client || !cdev) + if (!cdev || !cdev->client) return; + client = cdev->client; + /* Here we handle client opens. If the client is down, and * the netdev is registered, then open the client. */ @@ -423,16 +481,8 @@ int i40e_lan_add_device(struct i40e_pf *pf) pf->hw.pf_id, pf->hw.bus.bus_id, pf->hw.bus.device, pf->hw.bus.func); - /* If a client has already been registered, we need to add an instance - * of it to our new LAN device. - */ - if (registered_client) - i40e_client_add_instance(pf); + i40e_client_add_instance(pf); - /* Since in some cases register may have happened before a device gets - * added, we can schedule a subtask to go initiate the clients if - * they can be launched at probe time. - */ set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); i40e_service_event_schedule(pf); @@ -449,9 +499,13 @@ out: **/ int i40e_lan_del_device(struct i40e_pf *pf) { + struct auxiliary_device *aux_dev = pf->cinst->lan_info.aux_dev; struct i40e_device *ldev, *tmp; int ret = -ENODEV; + auxiliary_device_delete(aux_dev); + auxiliary_device_uninit(aux_dev); + /* First, remove any client instance. */ i40e_client_del_instance(pf); @@ -579,7 +633,7 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev, u32 v_idx, i, reg_idx, reg; ldev->qvlist_info = kzalloc(struct_size(ldev->qvlist_info, qv_info, - qvlist_info->num_vectors - 1), GFP_KERNEL); + qvlist_info->num_vectors), GFP_KERNEL); if (!ldev->qvlist_info) return -ENOMEM; ldev->qvlist_info->num_vectors = qvlist_info->num_vectors; @@ -732,6 +786,42 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev, return err; } +void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client) +{ + struct i40e_pf *pf = ldev->pf; + + pf->cinst->client = client; + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + i40e_service_event_schedule(pf); +} +EXPORT_SYMBOL_GPL(i40e_client_device_register); + +void i40e_client_device_unregister(struct i40e_info *ldev) +{ + struct i40e_pf *pf = ldev->pf; + struct i40e_client_instance *cdev = pf->cinst; + + if (!cdev) + return; + + while (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) + usleep_range(500, 1000); + + if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { + cdev->client->ops->close(&cdev->lan_info, cdev->client, false); + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); + i40e_client_release_qvlist(&cdev->lan_info); + } + + pf->cinst->client = NULL; + clear_bit(__I40E_SERVICE_SCHED, pf->state); +} +EXPORT_SYMBOL_GPL(i40e_client_device_unregister); + +/* Retain these legacy global registration/unregistration calls till i40iw is + * removed from the kernel. The irdma unified driver does not use these + * exported symbols. + */ /** * i40e_register_client - Register a i40e client driver with the L2 driver * @client: pointer to the i40e_client struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 67cb0b47416a..b4d3fed0d2f2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -552,9 +552,9 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw, * ENDIF */ -/* macro to make the table lines short */ +/* macro to make the table lines short, use explicit indexing with [PTYPE] */ #define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - { PTYPE, \ + [PTYPE] = { \ 1, \ I40E_RX_PTYPE_OUTER_##OUTER_IP, \ I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \ @@ -565,16 +565,15 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw, I40E_RX_PTYPE_INNER_PROT_##I, \ I40E_RX_PTYPE_PAYLOAD_LAYER_##PL } -#define I40E_PTT_UNUSED_ENTRY(PTYPE) \ - { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* shorter macros makes the table fit but are terse */ #define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG #define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG #define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC -/* Lookup table mapping the HW PTYPE to the bit field for decoding */ -struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = { +/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */ +struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = { /* L2 Packet types */ I40E_PTT_UNUSED_ENTRY(0), I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), @@ -780,118 +779,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = { I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), /* unused entries */ - I40E_PTT_UNUSED_ENTRY(154), - I40E_PTT_UNUSED_ENTRY(155), - I40E_PTT_UNUSED_ENTRY(156), - I40E_PTT_UNUSED_ENTRY(157), - I40E_PTT_UNUSED_ENTRY(158), - I40E_PTT_UNUSED_ENTRY(159), - - I40E_PTT_UNUSED_ENTRY(160), - I40E_PTT_UNUSED_ENTRY(161), - I40E_PTT_UNUSED_ENTRY(162), - I40E_PTT_UNUSED_ENTRY(163), - I40E_PTT_UNUSED_ENTRY(164), - I40E_PTT_UNUSED_ENTRY(165), - I40E_PTT_UNUSED_ENTRY(166), - I40E_PTT_UNUSED_ENTRY(167), - I40E_PTT_UNUSED_ENTRY(168), - I40E_PTT_UNUSED_ENTRY(169), - - I40E_PTT_UNUSED_ENTRY(170), - I40E_PTT_UNUSED_ENTRY(171), - I40E_PTT_UNUSED_ENTRY(172), - I40E_PTT_UNUSED_ENTRY(173), - I40E_PTT_UNUSED_ENTRY(174), - I40E_PTT_UNUSED_ENTRY(175), - I40E_PTT_UNUSED_ENTRY(176), - I40E_PTT_UNUSED_ENTRY(177), - I40E_PTT_UNUSED_ENTRY(178), - I40E_PTT_UNUSED_ENTRY(179), - - I40E_PTT_UNUSED_ENTRY(180), - I40E_PTT_UNUSED_ENTRY(181), - I40E_PTT_UNUSED_ENTRY(182), - I40E_PTT_UNUSED_ENTRY(183), - I40E_PTT_UNUSED_ENTRY(184), - I40E_PTT_UNUSED_ENTRY(185), - I40E_PTT_UNUSED_ENTRY(186), - I40E_PTT_UNUSED_ENTRY(187), - I40E_PTT_UNUSED_ENTRY(188), - I40E_PTT_UNUSED_ENTRY(189), - - I40E_PTT_UNUSED_ENTRY(190), - I40E_PTT_UNUSED_ENTRY(191), - I40E_PTT_UNUSED_ENTRY(192), - I40E_PTT_UNUSED_ENTRY(193), - I40E_PTT_UNUSED_ENTRY(194), - I40E_PTT_UNUSED_ENTRY(195), - I40E_PTT_UNUSED_ENTRY(196), - I40E_PTT_UNUSED_ENTRY(197), - I40E_PTT_UNUSED_ENTRY(198), - I40E_PTT_UNUSED_ENTRY(199), - - I40E_PTT_UNUSED_ENTRY(200), - I40E_PTT_UNUSED_ENTRY(201), - I40E_PTT_UNUSED_ENTRY(202), - I40E_PTT_UNUSED_ENTRY(203), - I40E_PTT_UNUSED_ENTRY(204), - I40E_PTT_UNUSED_ENTRY(205), - I40E_PTT_UNUSED_ENTRY(206), - I40E_PTT_UNUSED_ENTRY(207), - I40E_PTT_UNUSED_ENTRY(208), - I40E_PTT_UNUSED_ENTRY(209), - - I40E_PTT_UNUSED_ENTRY(210), - I40E_PTT_UNUSED_ENTRY(211), - I40E_PTT_UNUSED_ENTRY(212), - I40E_PTT_UNUSED_ENTRY(213), - I40E_PTT_UNUSED_ENTRY(214), - I40E_PTT_UNUSED_ENTRY(215), - I40E_PTT_UNUSED_ENTRY(216), - I40E_PTT_UNUSED_ENTRY(217), - I40E_PTT_UNUSED_ENTRY(218), - I40E_PTT_UNUSED_ENTRY(219), - - I40E_PTT_UNUSED_ENTRY(220), - I40E_PTT_UNUSED_ENTRY(221), - I40E_PTT_UNUSED_ENTRY(222), - I40E_PTT_UNUSED_ENTRY(223), - I40E_PTT_UNUSED_ENTRY(224), - I40E_PTT_UNUSED_ENTRY(225), - I40E_PTT_UNUSED_ENTRY(226), - I40E_PTT_UNUSED_ENTRY(227), - I40E_PTT_UNUSED_ENTRY(228), - I40E_PTT_UNUSED_ENTRY(229), - - I40E_PTT_UNUSED_ENTRY(230), - I40E_PTT_UNUSED_ENTRY(231), - I40E_PTT_UNUSED_ENTRY(232), - I40E_PTT_UNUSED_ENTRY(233), - I40E_PTT_UNUSED_ENTRY(234), - I40E_PTT_UNUSED_ENTRY(235), - I40E_PTT_UNUSED_ENTRY(236), - I40E_PTT_UNUSED_ENTRY(237), - I40E_PTT_UNUSED_ENTRY(238), - I40E_PTT_UNUSED_ENTRY(239), - - I40E_PTT_UNUSED_ENTRY(240), - I40E_PTT_UNUSED_ENTRY(241), - I40E_PTT_UNUSED_ENTRY(242), - I40E_PTT_UNUSED_ENTRY(243), - I40E_PTT_UNUSED_ENTRY(244), - I40E_PTT_UNUSED_ENTRY(245), - I40E_PTT_UNUSED_ENTRY(246), - I40E_PTT_UNUSED_ENTRY(247), - I40E_PTT_UNUSED_ENTRY(248), - I40E_PTT_UNUSED_ENTRY(249), - - I40E_PTT_UNUSED_ENTRY(250), - I40E_PTT_UNUSED_ENTRY(251), - I40E_PTT_UNUSED_ENTRY(252), - I40E_PTT_UNUSED_ENTRY(253), - I40E_PTT_UNUSED_ENTRY(254), - I40E_PTT_UNUSED_ENTRY(255) + [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index ccd5b9486ea9..3e822bad4851 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1262,8 +1262,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, if (ethtool_link_ksettings_test_link_mode(&safe_ks, supported, Autoneg) && - hw->phy.link_info.phy_type != - I40E_PHY_TYPE_10GBASE_T) { + hw->phy.media_type != I40E_MEDIA_TYPE_BASET) { netdev_info(netdev, "Autoneg cannot be disabled on this phy\n"); err = -EINVAL; goto done; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 704e474879c5..861e59a350bd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -32,7 +32,7 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired); static int i40e_add_vsi(struct i40e_vsi *vsi); static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi); -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); @@ -8703,6 +8703,8 @@ int i40e_vsi_open(struct i40e_vsi *vsi) dev_driver_string(&pf->pdev->dev), dev_name(&pf->pdev->dev)); err = i40e_vsi_request_irq(vsi, int_name); + if (err) + goto err_setup_rx; } else { err = -EINVAL; @@ -10569,7 +10571,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) #endif /* CONFIG_I40E_DCB */ if (!lock_acquired) rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); + ret = i40e_setup_pf_switch(pf, reinit, true); if (ret) goto end_unlock; @@ -14627,10 +14629,11 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig) * i40e_setup_pf_switch - Setup the HW switch on startup or after reset * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. + * @lock_acquired: indicates whether or not the lock has been acquired * * Returns 0 on success, negative value on failure **/ -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired) { u16 flags = 0; int ret; @@ -14732,9 +14735,15 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) i40e_ptp_init(pf); + if (!lock_acquired) + rtnl_lock(); + /* repopulate tunnel port filters */ udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev); + if (!lock_acquired) + rtnl_unlock(); + return ret; } @@ -15528,7 +15537,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; } #endif - err = i40e_setup_pf_switch(pf, false); + err = i40e_setup_pf_switch(pf, false, false); if (err) { dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; @@ -16270,6 +16279,7 @@ static void __exit i40e_exit_module(void) { pci_unregister_driver(&i40e_driver); destroy_workqueue(i40e_wq); + ida_destroy(&i40e_client_ida); i40e_dbg_exit(); } module_exit(i40e_exit_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index f1f6fc3744e9..7b971b205d36 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -11,13 +11,14 @@ * operate with the nanosecond field directly without fear of overflow. * * Much like the 82599, the update period is dependent upon the link speed: - * At 40Gb link or no link, the period is 1.6ns. - * At 10Gb link, the period is multiplied by 2. (3.2ns) + * At 40Gb, 25Gb, or no link, the period is 1.6ns. + * At 10Gb or 5Gb link, the period is multiplied by 2. (3.2ns) * At 1Gb link, the period is multiplied by 20. (32ns) * 1588 functionality is not supported at 100Mbps. */ #define I40E_PTP_40GB_INCVAL 0x0199999999ULL #define I40E_PTP_10GB_INCVAL_MULT 2 +#define I40E_PTP_5GB_INCVAL_MULT 2 #define I40E_PTP_1GB_INCVAL_MULT 20 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) @@ -465,6 +466,9 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) case I40E_LINK_SPEED_10GB: mult = I40E_PTP_10GB_INCVAL_MULT; break; + case I40E_LINK_SPEED_5GB: + mult = I40E_PTP_5GB_INCVAL_MULT; + break; case I40E_LINK_SPEED_1GB: mult = I40E_PTP_1GB_INCVAL_MULT; break; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b883ab809df3..38eb8151ee9a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2298,7 +2298,6 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp) struct bpf_prog *xdp_prog; u32 act; - rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (!xdp_prog) @@ -2334,7 +2333,6 @@ out_failure: break; } xdp_out: - rcu_read_unlock(); return result; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index c81109a63e90..36a4ca1ffb1a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -804,7 +804,6 @@ enum i40e_rx_l2_ptype { }; struct i40e_rx_ptype_decoded { - u32 ptype:8; u32 known:1; u32 outer_ip:1; u32 outer_ip_ver:1; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 68f177a86403..e7e778ca074c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -153,7 +153,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) struct bpf_prog *xdp_prog; u32 act; - rcu_read_lock(); /* NB! xdp_prog will always be !NULL, due to the fact that * this path is enabled by setting an XDP program. */ @@ -164,7 +163,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (err) goto out_failure; - rcu_read_unlock(); return I40E_XDP_REDIR; } @@ -188,7 +186,6 @@ out_failure: result = I40E_XDP_CONSUMED; break; } - rcu_read_unlock(); return result; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 8547fc8fdfd6..e9cc7f6ddc46 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -522,9 +522,9 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, * ENDIF */ -/* macro to make the table lines short */ +/* macro to make the table lines short, use explicit indexing with [PTYPE] */ #define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - { PTYPE, \ + [PTYPE] = { \ 1, \ IAVF_RX_PTYPE_OUTER_##OUTER_IP, \ IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \ @@ -535,16 +535,15 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, IAVF_RX_PTYPE_INNER_PROT_##I, \ IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL } -#define IAVF_PTT_UNUSED_ENTRY(PTYPE) \ - { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* shorter macros makes the table fit but are terse */ #define IAVF_RX_PTYPE_NOF IAVF_RX_PTYPE_NOT_FRAG #define IAVF_RX_PTYPE_FRG IAVF_RX_PTYPE_FRAG #define IAVF_RX_PTYPE_INNER_PROT_TS IAVF_RX_PTYPE_INNER_PROT_TIMESYNC -/* Lookup table mapping the HW PTYPE to the bit field for decoding */ -struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = { +/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */ +struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = { /* L2 Packet types */ IAVF_PTT_UNUSED_ENTRY(0), IAVF_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), @@ -750,118 +749,7 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = { IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), /* unused entries */ - IAVF_PTT_UNUSED_ENTRY(154), - IAVF_PTT_UNUSED_ENTRY(155), - IAVF_PTT_UNUSED_ENTRY(156), - IAVF_PTT_UNUSED_ENTRY(157), - IAVF_PTT_UNUSED_ENTRY(158), - IAVF_PTT_UNUSED_ENTRY(159), - - IAVF_PTT_UNUSED_ENTRY(160), - IAVF_PTT_UNUSED_ENTRY(161), - IAVF_PTT_UNUSED_ENTRY(162), - IAVF_PTT_UNUSED_ENTRY(163), - IAVF_PTT_UNUSED_ENTRY(164), - IAVF_PTT_UNUSED_ENTRY(165), - IAVF_PTT_UNUSED_ENTRY(166), - IAVF_PTT_UNUSED_ENTRY(167), - IAVF_PTT_UNUSED_ENTRY(168), - IAVF_PTT_UNUSED_ENTRY(169), - - IAVF_PTT_UNUSED_ENTRY(170), - IAVF_PTT_UNUSED_ENTRY(171), - IAVF_PTT_UNUSED_ENTRY(172), - IAVF_PTT_UNUSED_ENTRY(173), - IAVF_PTT_UNUSED_ENTRY(174), - IAVF_PTT_UNUSED_ENTRY(175), - IAVF_PTT_UNUSED_ENTRY(176), - IAVF_PTT_UNUSED_ENTRY(177), - IAVF_PTT_UNUSED_ENTRY(178), - IAVF_PTT_UNUSED_ENTRY(179), - - IAVF_PTT_UNUSED_ENTRY(180), - IAVF_PTT_UNUSED_ENTRY(181), - IAVF_PTT_UNUSED_ENTRY(182), - IAVF_PTT_UNUSED_ENTRY(183), - IAVF_PTT_UNUSED_ENTRY(184), - IAVF_PTT_UNUSED_ENTRY(185), - IAVF_PTT_UNUSED_ENTRY(186), - IAVF_PTT_UNUSED_ENTRY(187), - IAVF_PTT_UNUSED_ENTRY(188), - IAVF_PTT_UNUSED_ENTRY(189), - - IAVF_PTT_UNUSED_ENTRY(190), - IAVF_PTT_UNUSED_ENTRY(191), - IAVF_PTT_UNUSED_ENTRY(192), - IAVF_PTT_UNUSED_ENTRY(193), - IAVF_PTT_UNUSED_ENTRY(194), - IAVF_PTT_UNUSED_ENTRY(195), - IAVF_PTT_UNUSED_ENTRY(196), - IAVF_PTT_UNUSED_ENTRY(197), - IAVF_PTT_UNUSED_ENTRY(198), - IAVF_PTT_UNUSED_ENTRY(199), - - IAVF_PTT_UNUSED_ENTRY(200), - IAVF_PTT_UNUSED_ENTRY(201), - IAVF_PTT_UNUSED_ENTRY(202), - IAVF_PTT_UNUSED_ENTRY(203), - IAVF_PTT_UNUSED_ENTRY(204), - IAVF_PTT_UNUSED_ENTRY(205), - IAVF_PTT_UNUSED_ENTRY(206), - IAVF_PTT_UNUSED_ENTRY(207), - IAVF_PTT_UNUSED_ENTRY(208), - IAVF_PTT_UNUSED_ENTRY(209), - - IAVF_PTT_UNUSED_ENTRY(210), - IAVF_PTT_UNUSED_ENTRY(211), - IAVF_PTT_UNUSED_ENTRY(212), - IAVF_PTT_UNUSED_ENTRY(213), - IAVF_PTT_UNUSED_ENTRY(214), - IAVF_PTT_UNUSED_ENTRY(215), - IAVF_PTT_UNUSED_ENTRY(216), - IAVF_PTT_UNUSED_ENTRY(217), - IAVF_PTT_UNUSED_ENTRY(218), - IAVF_PTT_UNUSED_ENTRY(219), - - IAVF_PTT_UNUSED_ENTRY(220), - IAVF_PTT_UNUSED_ENTRY(221), - IAVF_PTT_UNUSED_ENTRY(222), - IAVF_PTT_UNUSED_ENTRY(223), - IAVF_PTT_UNUSED_ENTRY(224), - IAVF_PTT_UNUSED_ENTRY(225), - IAVF_PTT_UNUSED_ENTRY(226), - IAVF_PTT_UNUSED_ENTRY(227), - IAVF_PTT_UNUSED_ENTRY(228), - IAVF_PTT_UNUSED_ENTRY(229), - - IAVF_PTT_UNUSED_ENTRY(230), - IAVF_PTT_UNUSED_ENTRY(231), - IAVF_PTT_UNUSED_ENTRY(232), - IAVF_PTT_UNUSED_ENTRY(233), - IAVF_PTT_UNUSED_ENTRY(234), - IAVF_PTT_UNUSED_ENTRY(235), - IAVF_PTT_UNUSED_ENTRY(236), - IAVF_PTT_UNUSED_ENTRY(237), - IAVF_PTT_UNUSED_ENTRY(238), - IAVF_PTT_UNUSED_ENTRY(239), - - IAVF_PTT_UNUSED_ENTRY(240), - IAVF_PTT_UNUSED_ENTRY(241), - IAVF_PTT_UNUSED_ENTRY(242), - IAVF_PTT_UNUSED_ENTRY(243), - IAVF_PTT_UNUSED_ENTRY(244), - IAVF_PTT_UNUSED_ENTRY(245), - IAVF_PTT_UNUSED_ENTRY(246), - IAVF_PTT_UNUSED_ENTRY(247), - IAVF_PTT_UNUSED_ENTRY(248), - IAVF_PTT_UNUSED_ENTRY(249), - - IAVF_PTT_UNUSED_ENTRY(250), - IAVF_PTT_UNUSED_ENTRY(251), - IAVF_PTT_UNUSED_ENTRY(252), - IAVF_PTT_UNUSED_ENTRY(253), - IAVF_PTT_UNUSED_ENTRY(254), - IAVF_PTT_UNUSED_ENTRY(255) + [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index de9fda78b43a..9f1f523807c4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -370,7 +370,6 @@ enum iavf_rx_l2_ptype { }; struct iavf_rx_ptype_decoded { - u32 ptype:8; u32 known:1; u32 outer_ip:1; u32 outer_ip_ver:1; diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 07fe857e9e3a..4f538cdf42c1 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -22,12 +22,14 @@ ice-y := ice_main.o \ ice_ethtool_fdir.o \ ice_flex_pipe.o \ ice_flow.o \ + ice_idc.o \ ice_devlink.o \ ice_fw_update.o \ ice_lag.o \ ice_ethtool.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o +ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2924c67567b8..a450343fbb92 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -34,6 +34,7 @@ #include <linux/if_bridge.h> #include <linux/ctype.h> #include <linux/bpf.h> +#include <linux/auxiliary_bus.h> #include <linux/avf/virtchnl.h> #include <linux/cpu_rmap.h> #include <linux/dim.h> @@ -55,8 +56,10 @@ #include "ice_switch.h" #include "ice_common.h" #include "ice_sched.h" +#include "ice_idc_int.h" #include "ice_virtchnl_pf.h" #include "ice_sriov.h" +#include "ice_ptp.h" #include "ice_fdir.h" #include "ice_xsk.h" #include "ice_arfs.h" @@ -72,12 +75,15 @@ #define ICE_DFLT_TRAFFIC_CLASS BIT(0) #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) -#define ICE_AQ_LEN 64 +#define ICE_AQ_LEN 192 #define ICE_MBXSQ_LEN 64 +#define ICE_SBQ_LEN 64 #define ICE_MIN_LAN_TXRX_MSIX 1 #define ICE_MIN_LAN_OICR_MSIX 1 #define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) #define ICE_FDIR_MSIX 2 +#define ICE_RDMA_NUM_AEQ_MSIX 4 +#define ICE_MIN_RDMA_MSIX 2 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@ -88,8 +94,9 @@ #define ICE_MAX_LG_RSS_QS 256 #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) +#define ICE_RES_RDMA_VEC_ID (ICE_RES_MISC_VEC_ID - 1) /* All VF control VSIs share the same IRQ, so assign a unique ID for them */ -#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_MISC_VEC_ID - 1) +#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_INVAL_VFID 256 @@ -203,9 +210,9 @@ enum ice_pf_state { ICE_NEEDS_RESTART, ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ - ICE_PFR_REQ, /* set by driver and peers */ - ICE_CORER_REQ, /* set by driver and peers */ - ICE_GLOBR_REQ, /* set by driver and peers */ + ICE_PFR_REQ, /* set by driver */ + ICE_CORER_REQ, /* set by driver */ + ICE_GLOBR_REQ, /* set by driver */ ICE_CORER_RECV, /* set by OICR handler */ ICE_GLOBR_RECV, /* set by OICR handler */ ICE_EMPR_RECV, /* set by OICR handler */ @@ -222,6 +229,7 @@ enum ice_pf_state { ICE_STATE_NOMINAL_CHECK_BITS, ICE_ADMINQ_EVENT_PENDING, ICE_MAILBOXQ_EVENT_PENDING, + ICE_SIDEBANDQ_EVENT_PENDING, ICE_MDD_EVENT_PENDING, ICE_VFLR_EVENT_PENDING, ICE_FLTR_OVERFLOW_PROMISC, @@ -332,6 +340,7 @@ struct ice_vsi { u16 req_rxq; /* User requested Rx queues */ u16 num_rx_desc; u16 num_tx_desc; + u16 qset_handle[ICE_MAX_TRAFFIC_CLASS]; struct ice_tc_cfg tc_cfg; struct bpf_prog *xdp_prog; struct ice_ring **xdp_rings; /* XDP ring array */ @@ -374,17 +383,22 @@ struct ice_q_vector { enum ice_pf_flags { ICE_FLAG_FLTR_SYNC, + ICE_FLAG_RDMA_ENA, ICE_FLAG_RSS_ENA, ICE_FLAG_SRIOV_ENA, ICE_FLAG_SRIOV_CAPABLE, ICE_FLAG_DCB_CAPABLE, ICE_FLAG_DCB_ENA, ICE_FLAG_FD_ENA, + ICE_FLAG_PTP_SUPPORTED, /* PTP is supported by NVM */ + ICE_FLAG_PTP, /* PTP is enabled by software */ + ICE_FLAG_AUX_ENA, ICE_FLAG_ADV_FEATURES, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, ICE_FLAG_NO_MEDIA, ICE_FLAG_FW_LLDP_AGENT, + ICE_FLAG_MOD_POWER_UNSUPPORTED, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ ICE_FLAG_LEGACY_RX, ICE_FLAG_VF_TRUE_PROMISC_ENA, @@ -440,12 +454,17 @@ struct ice_pf { struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ struct mutex tc_mutex; /* lock to protect TC changes */ u32 msg_enable; + struct ice_ptp ptp; + u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */ + u16 rdma_base_vector; /* spinlock to protect the AdminQ wait list */ spinlock_t aq_wait_lock; struct hlist_head aq_wait_list; wait_queue_head_t aq_wait_queue; + wait_queue_head_t reset_wait_queue; + u32 hw_csum_rx_error; u16 oicr_idx; /* Other interrupt cause MSIX vector index */ u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ @@ -472,6 +491,8 @@ struct ice_pf { unsigned long tx_timeout_last_recovery; u32 tx_timeout_recovery_level; char int_name[ICE_INT_NAME_STR_LEN]; + struct auxiliary_device *adev; + int aux_idx; u32 sw_int_count; __le64 nvm_phy_type_lo; /* NVM PHY type low */ @@ -638,6 +659,9 @@ int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); +int ice_plug_aux_dev(struct ice_pf *pf); +void ice_unplug_aux_dev(struct ice_pf *pf); +int ice_init_rdma(struct ice_pf *pf); const char *ice_stat_str(enum ice_status stat_err); const char *ice_aq_str(enum ice_aq_err aq_err); bool ice_is_wol_supported(struct ice_hw *hw); @@ -662,4 +686,25 @@ int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf); +/** + * ice_set_rdma_cap - enable RDMA support + * @pf: PF struct + */ +static inline void ice_set_rdma_cap(struct ice_pf *pf) +{ + if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) { + set_bit(ICE_FLAG_RDMA_ENA, pf->flags); + ice_plug_aux_dev(pf); + } +} + +/** + * ice_clear_rdma_cap - disable RDMA support + * @pf: PF struct + */ +static inline void ice_clear_rdma_cap(struct ice_pf *pf) +{ + ice_unplug_aux_dev(pf); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); +} #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 5cdfe406af84..21b4c7cd6f05 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -108,6 +108,7 @@ struct ice_aqc_list_caps_elem { #define ICE_AQC_CAPS_TXQS 0x0042 #define ICE_AQC_CAPS_MSIX 0x0043 #define ICE_AQC_CAPS_FD 0x0045 +#define ICE_AQC_CAPS_1588 0x0046 #define ICE_AQC_CAPS_MAX_MTU 0x0047 #define ICE_AQC_CAPS_NVM_VER 0x0048 #define ICE_AQC_CAPS_PENDING_NVM_VER 0x0049 @@ -115,6 +116,7 @@ struct ice_aqc_list_caps_elem { #define ICE_AQC_CAPS_PENDING_OROM_VER 0x004B #define ICE_AQC_CAPS_NET_VER 0x004C #define ICE_AQC_CAPS_PENDING_NET_VER 0x004D +#define ICE_AQC_CAPS_RDMA 0x0051 #define ICE_AQC_CAPS_NVM_MGMT 0x0080 u8 major_ver; @@ -1122,7 +1124,9 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT BIT(5) #define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA BIT(6) #define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7) - u8 reserved1; + u8 link_cfg_err; +#define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED BIT(5) +#define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT BIT(7) u8 link_info; #define ICE_AQ_LINK_UP BIT(0) /* Link Status */ #define ICE_AQ_LINK_FAULT BIT(1) @@ -1165,7 +1169,7 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_CFG_PACING_TYPE_FIXED ICE_AQ_CFG_PACING_TYPE_M /* External Device Power Ability */ u8 power_desc; -#define ICE_AQ_PWR_CLASS_M 0x3 +#define ICE_AQ_PWR_CLASS_M 0x3F #define ICE_AQ_LINK_PWR_BASET_LOW_HIGH 0 #define ICE_AQ_LINK_PWR_BASET_HIGH 1 #define ICE_AQ_LINK_PWR_QSFP_CLASS_1 0 @@ -1608,6 +1612,15 @@ struct ice_aqc_get_set_rss_lut { __le32 addr_low; }; +/* Sideband Control Interface Commands */ +/* Neighbor Device Request (indirect 0x0C00); also used for the response. */ +struct ice_aqc_neigh_dev_req { + __le16 sb_data_len; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + /* Add Tx LAN Queues (indirect 0x0C30) */ struct ice_aqc_add_txqs { u8 num_qgrps; @@ -1684,6 +1697,36 @@ struct ice_aqc_dis_txq_item { __le16 q_id[]; } __packed; +/* Add Tx RDMA Queue Set (indirect 0x0C33) */ +struct ice_aqc_add_rdma_qset { + u8 num_qset_grps; + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is the descriptor of each Qset entry for the Add Tx RDMA Queue Set + * command (0x0C33). Only used within struct ice_aqc_add_rdma_qset. + */ +struct ice_aqc_add_tx_rdma_qset_entry { + __le16 tx_qset_id; + u8 rsvd[2]; + __le32 qset_teid; + struct ice_aqc_txsched_elem info; +}; + +/* The format of the command buffer for Add Tx RDMA Queue Set(0x0C33) + * is an array of the following structs. Please note that the length of + * each struct ice_aqc_add_rdma_qset is variable due to the variable + * number of queues in each group! + */ +struct ice_aqc_add_rdma_qset_data { + __le32 parent_teid; + __le16 num_qsets; + u8 rsvd[2]; + struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[]; +}; + /* Configure Firmware Logging Command (indirect 0xFF09) * Logging Information Read Response (indirect 0xFF10) * Note: The 0xFF10 command has no input parameters. @@ -1810,6 +1853,30 @@ struct ice_aqc_get_pkg_info_resp { struct ice_aqc_get_pkg_info pkg_info[]; }; +/* Driver Shared Parameters (direct, 0x0C90) */ +struct ice_aqc_driver_shared_params { + u8 set_or_get_op; +#define ICE_AQC_DRIVER_PARAM_OP_MASK BIT(0) +#define ICE_AQC_DRIVER_PARAM_SET 0 +#define ICE_AQC_DRIVER_PARAM_GET 1 + u8 param_indx; +#define ICE_AQC_DRIVER_PARAM_MAX_IDX 15 + u8 rsvd[2]; + __le32 param_val; + __le32 addr_high; + __le32 addr_low; +}; + +enum ice_aqc_driver_params { + /* OS clock index for PTP timer Domain 0 */ + ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0 = 0, + /* OS clock index for PTP timer Domain 1 */ + ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1, + + /* Add new parameters above */ + ICE_AQC_DRIVER_PARAM_MAX = 16, +}; + /* Lan Queue Overflow Event (direct, 0x1001) */ struct ice_aqc_event_lan_overflow { __le32 prtdcb_ruptq; @@ -1878,13 +1945,16 @@ struct ice_aq_desc { struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl; struct ice_aqc_get_set_rss_lut get_set_rss_lut; struct ice_aqc_get_set_rss_key get_set_rss_key; + struct ice_aqc_neigh_dev_req neigh_dev; struct ice_aqc_add_txqs add_txqs; struct ice_aqc_dis_txqs dis_txqs; + struct ice_aqc_add_rdma_qset add_rdma_qset; struct ice_aqc_add_get_update_free_vsi vsi_cmd; struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res; struct ice_aqc_fw_logging fw_logging; struct ice_aqc_get_clear_fw_log get_clear_fw_log; struct ice_aqc_download_pkg download_pkg; + struct ice_aqc_driver_shared_params drv_shared_params; struct ice_aqc_set_mac_lb set_mac_lb; struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; struct ice_aqc_set_mac_cfg set_mac_cfg; @@ -2025,15 +2095,21 @@ enum ice_adminq_opc { ice_aqc_opc_get_rss_key = 0x0B04, ice_aqc_opc_get_rss_lut = 0x0B05, + /* Sideband Control Interface commands */ + ice_aqc_opc_neighbour_device_request = 0x0C00, + /* Tx queue handling commands/events */ ice_aqc_opc_add_txqs = 0x0C30, ice_aqc_opc_dis_txqs = 0x0C31, + ice_aqc_opc_add_rdma_qset = 0x0C33, /* package commands */ ice_aqc_opc_download_pkg = 0x0C40, ice_aqc_opc_update_pkg = 0x0C42, ice_aqc_opc_get_pkg_info_list = 0x0C43, + ice_aqc_opc_driver_shared_params = 0x0C90, + /* Standalone Commands/Events */ ice_aqc_opc_event_lan_overflow = 0x1001, diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h index f39cd16403ed..80ed76f0cace 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.h +++ b/drivers/net/ethernet/intel/ice/ice_arfs.h @@ -52,12 +52,12 @@ bool ice_is_arfs_using_perfect_flow(struct ice_hw *hw, enum ice_fltr_ptype flow_type); #else -#define ice_sync_arfs_fltrs(pf) do {} while (0) -#define ice_init_arfs(vsi) do {} while (0) -#define ice_clear_arfs(vsi) do {} while (0) -#define ice_remove_arfs(pf) do {} while (0) -#define ice_free_cpu_rx_rmap(vsi) do {} while (0) -#define ice_rebuild_arfs(pf) do {} while (0) +static inline void ice_clear_arfs(struct ice_vsi *vsi) { } +static inline void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { } +static inline void ice_init_arfs(struct ice_vsi *vsi) { } +static inline void ice_sync_arfs_fltrs(struct ice_pf *pf) { } +static inline void ice_remove_arfs(struct ice_pf *pf) { } +static inline void ice_rebuild_arfs(struct ice_pf *pf) { } static inline int ice_set_cpu_rx_rmap(struct ice_vsi __always_unused *vsi) { diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5985a7e5ca8a..c36057efc7ae 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -287,6 +287,15 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) /* make sure the context is associated with the right VSI */ tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + /* Restrict Tx timestamps to the PF VSI */ + switch (vsi->type) { + case ICE_VSI_PF: + tlan_ctx->tsyn_ena = 1; + break; + default: + break; + } + tlan_ctx->tso_ena = ICE_TX_LEGACY; tlan_ctx->tso_qnum = pf_q; @@ -319,11 +328,9 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring) * * Configure the Rx descriptor ring in RLAN context. */ -int ice_setup_rx_ctx(struct ice_ring *ring) +static int ice_setup_rx_ctx(struct ice_ring *ring) { - struct device *dev = ice_pf_to_dev(ring->vsi->back); int chain_len = ICE_MAX_CHAINED_RX_BUFS; - u16 num_bufs = ICE_DESC_UNUSED(ring); struct ice_vsi *vsi = ring->vsi; u32 rxdid = ICE_RXDID_FLEX_NIC; struct ice_rlan_ctx rlan_ctx; @@ -339,48 +346,6 @@ int ice_setup_rx_ctx(struct ice_ring *ring) /* clear the context structure first */ memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - ring->rx_buf_len = vsi->rx_buf_len; - - if (ring->vsi->type == ICE_VSI_PF) { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) - /* coverity[check_return] */ - xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, ring->q_vector->napi.napi_id); - - ring->xsk_pool = ice_xsk_pool(ring); - if (ring->xsk_pool) { - xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); - - ring->rx_buf_len = - xsk_pool_get_rx_frame_size(ring->xsk_pool); - /* For AF_XDP ZC, we disallow packets to span on - * multiple buffers, thus letting us skip that - * handling in the fast-path. - */ - chain_len = 1; - err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_XSK_BUFF_POOL, - NULL); - if (err) - return err; - xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); - - dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", - ring->q_index); - } else { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) - /* coverity[check_return] */ - xdp_rxq_info_reg(&ring->xdp_rxq, - ring->netdev, - ring->q_index, ring->q_vector->napi.napi_id); - - err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (err) - return err; - } - } /* Receive Queue Base Address. * Indicates the starting address of the descriptor queue defined in * 128 Byte units. @@ -415,6 +380,12 @@ int ice_setup_rx_ctx(struct ice_ring *ring) */ rlan_ctx.showiv = 0; + /* For AF_XDP ZC, we disallow packets to span on + * multiple buffers, thus letting us skip that + * handling in the fast-path. + */ + if (ring->xsk_pool) + chain_len = 1; /* Max packet size for this queue - must not be set to a larger value * than 5 x DBUF */ @@ -431,14 +402,15 @@ int ice_setup_rx_ctx(struct ice_ring *ring) * of same priority */ if (vsi->type != ICE_VSI_VF) - ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3); + ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true); else - ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3); + ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3, + false); /* Absolute queue number out of 2K needs to be passed */ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); if (err) { - dev_err(dev, "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", pf_q, err); return -EIO; } @@ -458,6 +430,66 @@ int ice_setup_rx_ctx(struct ice_ring *ring) ring->tail = hw->hw_addr + QRX_TAIL(pf_q); writel(0, ring->tail); + return 0; +} + +/** + * ice_vsi_cfg_rxq - Configure an Rx queue + * @ring: the ring being configured + * + * Return 0 on success and a negative value on error. + */ +int ice_vsi_cfg_rxq(struct ice_ring *ring) +{ + struct device *dev = ice_pf_to_dev(ring->vsi->back); + u16 num_bufs = ICE_DESC_UNUSED(ring); + int err; + + ring->rx_buf_len = ring->vsi->rx_buf_len; + + if (ring->vsi->type == ICE_VSI_PF) { + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + /* coverity[check_return] */ + xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, ring->q_vector->napi.napi_id); + + ring->xsk_pool = ice_xsk_pool(ring); + if (ring->xsk_pool) { + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + + ring->rx_buf_len = + xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL); + if (err) + return err; + xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); + + dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", + ring->q_index); + } else { + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + /* coverity[check_return] */ + xdp_rxq_info_reg(&ring->xdp_rxq, + ring->netdev, + ring->q_index, ring->q_vector->napi.napi_id); + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + return err; + } + } + + err = ice_setup_rx_ctx(ring); + if (err) { + dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", + ring->q_index, err); + return err; + } + if (ring->xsk_pool) { bool ok; @@ -470,9 +502,13 @@ int ice_setup_rx_ctx(struct ice_ring *ring) } ok = ice_alloc_rx_bufs_zc(ring, num_bufs); - if (!ok) + if (!ok) { + u16 pf_q = ring->vsi->rxq_map[ring->q_index]; + dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n", ring->q_index, pf_q); + } + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h index 44efdb627043..20e1c29aa68a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.h +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -6,7 +6,7 @@ #include "ice.h" -int ice_setup_rx_ctx(struct ice_ring *ring); +int ice_vsi_cfg_rxq(struct ice_ring *ring); int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg); int ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait); diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index e93b1e40f627..2fb81e359cdf 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2,6 +2,7 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice_common.h" +#include "ice_lib.h" #include "ice_sched.h" #include "ice_adminq_cmd.h" #include "ice_flow.h" @@ -58,6 +59,17 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) } /** + * ice_is_e810 + * @hw: pointer to the hardware structure + * + * returns true if the device is E810 based, false if not. + */ +bool ice_is_e810(struct ice_hw *hw) +{ + return hw->mac_type == ICE_MAC_E810; +} + +/** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure * @@ -424,6 +436,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, li->phy_type_high = le64_to_cpu(link_data.phy_type_high); *hw_media_type = ice_get_media_type(pi); li->link_info = link_data.link_info; + li->link_cfg_err = link_data.link_cfg_err; li->an_info = link_data.an_info; li->ext_info = link_data.ext_info; li->max_frame_size = le16_to_cpu(link_data.max_frame_size); @@ -454,6 +467,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, (unsigned long long)li->phy_type_high); ice_debug(hw, ICE_DBG_LINK, " media_type = 0x%x\n", *hw_media_type); ice_debug(hw, ICE_DBG_LINK, " link_info = 0x%x\n", li->link_info); + ice_debug(hw, ICE_DBG_LINK, " link_cfg_err = 0x%x\n", li->link_cfg_err); ice_debug(hw, ICE_DBG_LINK, " an_info = 0x%x\n", li->an_info); ice_debug(hw, ICE_DBG_LINK, " ext_info = 0x%x\n", li->ext_info); ice_debug(hw, ICE_DBG_LINK, " fec_info = 0x%x\n", li->fec_info); @@ -1062,7 +1076,8 @@ enum ice_status ice_check_reset(struct ice_hw *hw) GLNVM_ULD_POR_DONE_1_M |\ GLNVM_ULD_PCIER_DONE_2_M) - uld_mask = ICE_RESET_DONE_MASK; + uld_mask = ICE_RESET_DONE_MASK | (hw->func_caps.common_cap.rdma ? + GLNVM_ULD_PE_DONE_M : 0); /* Device is Active; check Global Reset processes are done */ for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { @@ -1289,6 +1304,64 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { { 0 } }; +/* Sideband Queue command wrappers */ + +/** + * ice_sbq_send_cmd - send Sideband Queue command to Sideband Queue + * @hw: pointer to the HW struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * @cd: pointer to command details structure + */ +static int +ice_sbq_send_cmd(struct ice_hw *hw, struct ice_sbq_cmd_desc *desc, + void *buf, u16 buf_size, struct ice_sq_cd *cd) +{ + return ice_status_to_errno(ice_sq_send_cmd(hw, ice_get_sbq(hw), + (struct ice_aq_desc *)desc, + buf, buf_size, cd)); +} + +/** + * ice_sbq_rw_reg - Fill Sideband Queue command + * @hw: pointer to the HW struct + * @in: message info to be filled in descriptor + */ +int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in) +{ + struct ice_sbq_cmd_desc desc = {0}; + struct ice_sbq_msg_req msg = {0}; + u16 msg_len; + int status; + + msg_len = sizeof(msg); + + msg.dest_dev = in->dest_dev; + msg.opcode = in->opcode; + msg.flags = ICE_SBQ_MSG_FLAGS; + msg.sbe_fbe = ICE_SBQ_MSG_SBE_FBE; + msg.msg_addr_low = cpu_to_le16(in->msg_addr_low); + msg.msg_addr_high = cpu_to_le32(in->msg_addr_high); + + if (in->opcode) + msg.data = cpu_to_le32(in->data); + else + /* data read comes back in completion, so shorten the struct by + * sizeof(msg.data) + */ + msg_len -= sizeof(msg.data); + + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); + desc.opcode = cpu_to_le16(ice_sbq_opc_neigh_dev_req); + desc.param0.cmd_len = cpu_to_le16(msg_len); + status = ice_sbq_send_cmd(hw, &desc, &msg, msg_len, NULL); + if (!status && !in->opcode) + in->data = le32_to_cpu + (((struct ice_sbq_msg_cmpl *)&msg)->data); + return status; +} + /* FW Admin Queue command wrappers */ /* Software lock/mutex that is meant to be held while the Global Config Lock @@ -1938,6 +2011,10 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps, ice_debug(hw, ICE_DBG_INIT, "%s: nvm_unified_update = %d\n", prefix, caps->nvm_unified_update); break; + case ICE_AQC_CAPS_RDMA: + caps->rdma = (number == 1); + ice_debug(hw, ICE_DBG_INIT, "%s: rdma = %d\n", prefix, caps->rdma); + break; case ICE_AQC_CAPS_MAX_MTU: caps->max_mtu = number; ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n", @@ -1971,6 +2048,16 @@ ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps) caps->maxtc = 4; ice_debug(hw, ICE_DBG_INIT, "reducing maxtc to %d (based on #ports)\n", caps->maxtc); + if (caps->rdma) { + ice_debug(hw, ICE_DBG_INIT, "forcing RDMA off\n"); + caps->rdma = 0; + } + + /* print message only when processing device capabilities + * during initialization. + */ + if (caps == &hw->dev_caps.common_cap) + dev_info(ice_hw_to_dev(hw), "RDMA functionality is not available with the current device configuration.\n"); } } @@ -2017,6 +2104,48 @@ ice_parse_vsi_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, } /** + * ice_parse_1588_func_caps - Parse ICE_AQC_CAPS_1588 function caps + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @cap: pointer to the capability element to parse + * + * Extract function capabilities for ICE_AQC_CAPS_1588. + */ +static void +ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, + struct ice_aqc_list_caps_elem *cap) +{ + struct ice_ts_func_info *info = &func_p->ts_func_info; + u32 number = le32_to_cpu(cap->number); + + info->ena = ((number & ICE_TS_FUNC_ENA_M) != 0); + func_p->common_cap.ieee_1588 = info->ena; + + info->src_tmr_owned = ((number & ICE_TS_SRC_TMR_OWND_M) != 0); + info->tmr_ena = ((number & ICE_TS_TMR_ENA_M) != 0); + info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0); + info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0); + + info->clk_freq = (number & ICE_TS_CLK_FREQ_M) >> ICE_TS_CLK_FREQ_S; + info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0); + + ice_debug(hw, ICE_DBG_INIT, "func caps: ieee_1588 = %u\n", + func_p->common_cap.ieee_1588); + ice_debug(hw, ICE_DBG_INIT, "func caps: src_tmr_owned = %u\n", + info->src_tmr_owned); + ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_ena = %u\n", + info->tmr_ena); + ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_owned = %u\n", + info->tmr_index_owned); + ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_assoc = %u\n", + info->tmr_index_assoc); + ice_debug(hw, ICE_DBG_INIT, "func caps: clk_freq = %u\n", + info->clk_freq); + ice_debug(hw, ICE_DBG_INIT, "func caps: clk_src = %u\n", + info->clk_src); +} + +/** * ice_parse_fdir_func_caps - Parse ICE_AQC_CAPS_FD function caps * @hw: pointer to the HW struct * @func_p: pointer to function capabilities structure @@ -2082,6 +2211,9 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, case ICE_AQC_CAPS_VSI: ice_parse_vsi_func_caps(hw, func_p, &cap_resp[i]); break; + case ICE_AQC_CAPS_1588: + ice_parse_1588_func_caps(hw, func_p, &cap_resp[i]); + break; case ICE_AQC_CAPS_FD: ice_parse_fdir_func_caps(hw, func_p); break; @@ -2155,6 +2287,57 @@ ice_parse_vsi_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, } /** + * ice_parse_1588_dev_caps - Parse ICE_AQC_CAPS_1588 device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse ICE_AQC_CAPS_1588 for device capabilities. + */ +static void +ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, + struct ice_aqc_list_caps_elem *cap) +{ + struct ice_ts_dev_info *info = &dev_p->ts_dev_info; + u32 logical_id = le32_to_cpu(cap->logical_id); + u32 phys_id = le32_to_cpu(cap->phys_id); + u32 number = le32_to_cpu(cap->number); + + info->ena = ((number & ICE_TS_DEV_ENA_M) != 0); + dev_p->common_cap.ieee_1588 = info->ena; + + info->tmr0_owner = number & ICE_TS_TMR0_OWNR_M; + info->tmr0_owned = ((number & ICE_TS_TMR0_OWND_M) != 0); + info->tmr0_ena = ((number & ICE_TS_TMR0_ENA_M) != 0); + + info->tmr1_owner = (number & ICE_TS_TMR1_OWNR_M) >> ICE_TS_TMR1_OWNR_S; + info->tmr1_owned = ((number & ICE_TS_TMR1_OWND_M) != 0); + info->tmr1_ena = ((number & ICE_TS_TMR1_ENA_M) != 0); + + info->ena_ports = logical_id; + info->tmr_own_map = phys_id; + + ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 = %u\n", + dev_p->common_cap.ieee_1588); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owner = %u\n", + info->tmr0_owner); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owned = %u\n", + info->tmr0_owned); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_ena = %u\n", + info->tmr0_ena); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owner = %u\n", + info->tmr1_owner); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owned = %u\n", + info->tmr1_owned); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_ena = %u\n", + info->tmr1_ena); + ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 ena_ports = %u\n", + info->ena_ports); + ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr_own_map = %u\n", + info->tmr_own_map); +} + +/** * ice_parse_fdir_dev_caps - Parse ICE_AQC_CAPS_FD device caps * @hw: pointer to the HW struct * @dev_p: pointer to device capabilities structure @@ -2215,6 +2398,9 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, case ICE_AQC_CAPS_VSI: ice_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]); break; + case ICE_AQC_CAPS_1588: + ice_parse_1588_dev_caps(hw, dev_p, &cap_resp[i]); + break; case ICE_AQC_CAPS_FD: ice_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]); break; @@ -3635,6 +3821,52 @@ do_aq: return status; } +/** + * ice_aq_add_rdma_qsets + * @hw: pointer to the hardware structure + * @num_qset_grps: Number of RDMA Qset groups + * @qset_list: list of Qset groups to be added + * @buf_size: size of buffer for indirect command + * @cd: pointer to command details structure or NULL + * + * Add Tx RDMA Qsets (0x0C33) + */ +static int +ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps, + struct ice_aqc_add_rdma_qset_data *qset_list, + u16 buf_size, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_rdma_qset_data *list; + struct ice_aqc_add_rdma_qset *cmd; + struct ice_aq_desc desc; + u16 i, sum_size = 0; + + cmd = &desc.params.add_rdma_qset; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_rdma_qset); + + if (num_qset_grps > ICE_LAN_TXQ_MAX_QGRPS) + return -EINVAL; + + for (i = 0, list = qset_list; i < num_qset_grps; i++) { + u16 num_qsets = le16_to_cpu(list->num_qsets); + + sum_size += struct_size(list, rdma_qsets, num_qsets); + list = (struct ice_aqc_add_rdma_qset_data *)(list->rdma_qsets + + num_qsets); + } + + if (buf_size != sum_size) + return -EINVAL; + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_qset_grps = num_qset_grps; + + return ice_status_to_errno(ice_aq_send_cmd(hw, &desc, qset_list, + buf_size, cd)); +} + /* End of FW Admin Queue command wrappers */ /** @@ -4133,6 +4365,162 @@ ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap, } /** + * ice_cfg_vsi_rdma - configure the VSI RDMA queues + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc_bitmap: TC bitmap + * @max_rdmaqs: max RDMA queues array per TC + * + * This function adds/updates the VSI RDMA queues per TC. + */ +int +ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap, + u16 *max_rdmaqs) +{ + return ice_status_to_errno(ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, + max_rdmaqs, + ICE_SCHED_NODE_OWNER_RDMA)); +} + +/** + * ice_ena_vsi_rdma_qset + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: TC number + * @rdma_qset: pointer to RDMA Qset + * @num_qsets: number of RDMA Qsets + * @qset_teid: pointer to Qset node TEIDs + * + * This function adds RDMA Qset + */ +int +ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 *rdma_qset, u16 num_qsets, u32 *qset_teid) +{ + struct ice_aqc_txsched_elem_data node = { 0 }; + struct ice_aqc_add_rdma_qset_data *buf; + struct ice_sched_node *parent; + enum ice_status status; + struct ice_hw *hw; + u16 i, buf_size; + int ret; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return -EIO; + hw = pi->hw; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return -EINVAL; + + buf_size = struct_size(buf, rdma_qsets, num_qsets); + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + mutex_lock(&pi->sched_lock); + + parent = ice_sched_get_free_qparent(pi, vsi_handle, tc, + ICE_SCHED_NODE_OWNER_RDMA); + if (!parent) { + ret = -EINVAL; + goto rdma_error_exit; + } + buf->parent_teid = parent->info.node_teid; + node.parent_teid = parent->info.node_teid; + + buf->num_qsets = cpu_to_le16(num_qsets); + for (i = 0; i < num_qsets; i++) { + buf->rdma_qsets[i].tx_qset_id = cpu_to_le16(rdma_qset[i]); + buf->rdma_qsets[i].info.valid_sections = + ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR | + ICE_AQC_ELEM_VALID_EIR; + buf->rdma_qsets[i].info.generic = 0; + buf->rdma_qsets[i].info.cir_bw.bw_profile_idx = + cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID); + buf->rdma_qsets[i].info.cir_bw.bw_alloc = + cpu_to_le16(ICE_SCHED_DFLT_BW_WT); + buf->rdma_qsets[i].info.eir_bw.bw_profile_idx = + cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID); + buf->rdma_qsets[i].info.eir_bw.bw_alloc = + cpu_to_le16(ICE_SCHED_DFLT_BW_WT); + } + ret = ice_aq_add_rdma_qsets(hw, 1, buf, buf_size, NULL); + if (ret) { + ice_debug(hw, ICE_DBG_RDMA, "add RDMA qset failed\n"); + goto rdma_error_exit; + } + node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF; + for (i = 0; i < num_qsets; i++) { + node.node_teid = buf->rdma_qsets[i].qset_teid; + status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, + &node); + if (status) { + ret = ice_status_to_errno(status); + break; + } + qset_teid[i] = le32_to_cpu(node.node_teid); + } +rdma_error_exit: + mutex_unlock(&pi->sched_lock); + kfree(buf); + return ret; +} + +/** + * ice_dis_vsi_rdma_qset - free RDMA resources + * @pi: port_info struct + * @count: number of RDMA Qsets to free + * @qset_teid: TEID of Qset node + * @q_id: list of queue IDs being disabled + */ +int +ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid, + u16 *q_id) +{ + struct ice_aqc_dis_txq_item *qg_list; + enum ice_status status = 0; + struct ice_hw *hw; + u16 qg_size; + int i; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return -EIO; + + hw = pi->hw; + + qg_size = struct_size(qg_list, q_id, 1); + qg_list = kzalloc(qg_size, GFP_KERNEL); + if (!qg_list) + return -ENOMEM; + + mutex_lock(&pi->sched_lock); + + for (i = 0; i < count; i++) { + struct ice_sched_node *node; + + node = ice_sched_find_node_by_teid(pi->root, qset_teid[i]); + if (!node) + continue; + + qg_list->parent_teid = node->info.parent_teid; + qg_list->num_qs = 1; + qg_list->q_id[0] = + cpu_to_le16(q_id[i] | + ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET); + + status = ice_aq_dis_lan_txq(hw, 1, qg_list, qg_size, + ICE_NO_RESET, 0, NULL); + if (status) + break; + + ice_free_sched_node(pi, node); + } + + mutex_unlock(&pi->sched_lock); + kfree(qg_list); + return ice_status_to_errno(status); +} + +/** * ice_replay_pre_init - replay pre initialization * @hw: pointer to the HW struct * @@ -4304,6 +4692,81 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, } /** + * ice_aq_set_driver_param - Set driver parameter to share via firmware + * @hw: pointer to the HW struct + * @idx: parameter index to set + * @value: the value to set the parameter to + * @cd: pointer to command details structure or NULL + * + * Set the value of one of the software defined parameters. All PFs connected + * to this device can read the value using ice_aq_get_driver_param. + * + * Note that firmware provides no synchronization or locking, and will not + * save the parameter value during a device reset. It is expected that + * a single PF will write the parameter value, while all other PFs will only + * read it. + */ +int +ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, + u32 value, struct ice_sq_cd *cd) +{ + struct ice_aqc_driver_shared_params *cmd; + struct ice_aq_desc desc; + + if (idx >= ICE_AQC_DRIVER_PARAM_MAX) + return -EIO; + + cmd = &desc.params.drv_shared_params; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params); + + cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_SET; + cmd->param_indx = idx; + cmd->param_val = cpu_to_le32(value); + + return ice_status_to_errno(ice_aq_send_cmd(hw, &desc, NULL, 0, cd)); +} + +/** + * ice_aq_get_driver_param - Get driver parameter shared via firmware + * @hw: pointer to the HW struct + * @idx: parameter index to set + * @value: storage to return the shared parameter + * @cd: pointer to command details structure or NULL + * + * Get the value of one of the software defined parameters. + * + * Note that firmware provides no synchronization or locking. It is expected + * that only a single PF will write a given parameter. + */ +int +ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, + u32 *value, struct ice_sq_cd *cd) +{ + struct ice_aqc_driver_shared_params *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + if (idx >= ICE_AQC_DRIVER_PARAM_MAX) + return -EIO; + + cmd = &desc.params.drv_shared_params; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params); + + cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_GET; + cmd->param_indx = idx; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (status) + return ice_status_to_errno(status); + + *value = le32_to_cpu(cmd->param_val); + + return 0; +} + +/** * ice_fw_supports_link_override * @hw: pointer to the hardware structure * diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 7a9d2dfb21a2..fb16070f02e2 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -40,6 +40,8 @@ enum ice_status ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, enum ice_adminq_opc opc, struct ice_sq_cd *cd); +bool ice_is_sbq_supported(struct ice_hw *hw); +struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw); enum ice_status ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, @@ -97,6 +99,7 @@ ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high, enum ice_status ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, struct ice_sq_cd *cd); +bool ice_is_e810(struct ice_hw *hw); enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); enum ice_status ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi, @@ -147,6 +150,15 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, bool write, struct ice_sq_cd *cd); +int +ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap, + u16 *max_rdmaqs); +int +ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 *rdma_qset, u16 num_qsets, u32 *qset_teid); +int +ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid, + u16 *q_id); enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues, u16 *q_handle, u16 *q_ids, u32 *q_teids, @@ -164,6 +176,7 @@ void ice_replay_post(struct ice_hw *hw); void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf); struct ice_q_ctx * ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle); +int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in); void ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); @@ -173,6 +186,12 @@ ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, enum ice_status ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_txsched_elem_data *buf); +int +ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, + u32 value, struct ice_sq_cd *cd); +int +ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, + u32 *value, struct ice_sq_cd *cd); enum ice_status ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, struct ice_sq_cd *cd); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 87b33bdd4960..03bdb125be36 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -52,6 +52,19 @@ static void ice_mailbox_init_regs(struct ice_hw *hw) } /** + * ice_sb_init_regs - Initialize Sideband registers + * @hw: pointer to the hardware structure + * + * This assumes the alloc_sq and alloc_rq functions have already been called + */ +static void ice_sb_init_regs(struct ice_hw *hw) +{ + struct ice_ctl_q_info *cq = &hw->sbq; + + ICE_CQ_INIT_REGS(cq, PF_SB); +} + +/** * ice_check_sq_alive * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue @@ -609,6 +622,10 @@ static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) ice_adminq_init_regs(hw); cq = &hw->adminq; break; + case ICE_CTL_Q_SB: + ice_sb_init_regs(hw); + cq = &hw->sbq; + break; case ICE_CTL_Q_MAILBOX: ice_mailbox_init_regs(hw); cq = &hw->mailboxq; @@ -646,6 +663,32 @@ init_ctrlq_free_sq: } /** + * ice_is_sbq_supported - is the sideband queue supported + * @hw: pointer to the hardware structure + * + * Returns true if the sideband control queue interface is + * supported for the device, false otherwise + */ +bool ice_is_sbq_supported(struct ice_hw *hw) +{ + /* The device sideband queue is only supported on devices with the + * generic MAC type. + */ + return hw->mac_type == ICE_MAC_GENERIC; +} + +/** + * ice_get_sbq - returns the right control queue to use for sideband + * @hw: pointer to the hardware structure + */ +struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw) +{ + if (ice_is_sbq_supported(hw)) + return &hw->sbq; + return &hw->adminq; +} + +/** * ice_shutdown_ctrlq - shutdown routine for any control queue * @hw: pointer to the hardware structure * @q_type: specific Control queue type @@ -662,6 +705,9 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) if (ice_check_sq_alive(hw, cq)) ice_aq_q_shutdown(hw, true); break; + case ICE_CTL_Q_SB: + cq = &hw->sbq; + break; case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; break; @@ -685,6 +731,9 @@ void ice_shutdown_all_ctrlq(struct ice_hw *hw) { /* Shutdown FW admin queue */ ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); + /* Shutdown PHY Sideband */ + if (ice_is_sbq_supported(hw)) + ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB); /* Shutdown PF-VF Mailbox */ ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX); } @@ -724,6 +773,15 @@ enum ice_status ice_init_all_ctrlq(struct ice_hw *hw) if (status) return status; + /* sideband control queue (SBQ) interface is not supported on some + * devices. Initialize if supported, else fallback to the admin queue + * interface + */ + if (ice_is_sbq_supported(hw)) { + status = ice_init_ctrlq(hw, ICE_CTL_Q_SB); + if (status) + return status; + } /* Init Mailbox queue */ return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX); } @@ -759,6 +817,8 @@ static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq) enum ice_status ice_create_all_ctrlq(struct ice_hw *hw) { ice_init_ctrlq_locks(&hw->adminq); + if (ice_is_sbq_supported(hw)) + ice_init_ctrlq_locks(&hw->sbq); ice_init_ctrlq_locks(&hw->mailboxq); return ice_init_all_ctrlq(hw); @@ -791,6 +851,8 @@ void ice_destroy_all_ctrlq(struct ice_hw *hw) ice_shutdown_all_ctrlq(hw); ice_destroy_ctrlq_locks(&hw->adminq); + if (ice_is_sbq_supported(hw)) + ice_destroy_ctrlq_locks(&hw->sbq); ice_destroy_ctrlq_locks(&hw->mailboxq); } diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index fe75871e48ca..c07e9cc9fc6e 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -9,6 +9,7 @@ /* Maximum buffer lengths for all control queue types */ #define ICE_AQ_MAX_BUF_LEN 4096 #define ICE_MBXQ_MAX_BUF_LEN 4096 +#define ICE_SBQ_MAX_BUF_LEN 512 #define ICE_CTL_Q_DESC(R, i) \ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i])) @@ -29,6 +30,7 @@ enum ice_ctl_q { ICE_CTL_Q_UNKNOWN = 0, ICE_CTL_Q_ADMIN, ICE_CTL_Q_MAILBOX, + ICE_CTL_Q_SB, }; /* Control Queue timeout settings - max delay 1s */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index df02cffdf209..926cf748c5ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -275,6 +275,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) struct ice_dcbx_cfg *old_cfg, *curr_cfg; struct device *dev = ice_pf_to_dev(pf); int ret = ICE_DCB_NO_HW_CHG; + struct iidc_event *event; struct ice_vsi *pf_vsi; curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; @@ -313,6 +314,17 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) goto free_cfg; } + /* Notify AUX drivers about impending change to TCs */ + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) { + ret = -ENOMEM; + goto free_cfg; + } + + set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type); + ice_send_event_to_aux(pf, event); + kfree(event); + /* avoid race conditions by holding the lock while disabling and * re-enabling the VSI */ @@ -640,6 +652,7 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf) void ice_pf_dcb_recfg(struct ice_pf *pf) { struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + struct iidc_event *event; u8 tc_map = 0; int v, ret; @@ -675,6 +688,14 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) if (vsi->type == ICE_VSI_PF) ice_dcbnl_set_all(vsi); } + /* Notify the AUX drivers that TC change is finished */ + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return; + + set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type); + ice_send_event_to_aux(pf, event); + kfree(event); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 35c21d9ae009..261b6e2ed7bc 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -60,7 +60,7 @@ static inline bool ice_is_dcb_active(struct ice_pf *pf) test_bit(ICE_FLAG_DCB_ENA, pf->flags)); } #else -#define ice_dcb_rebuild(pf) do {} while (0) +static inline void ice_dcb_rebuild(struct ice_pf *pf) { } static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) { @@ -113,11 +113,12 @@ ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf, return false; } -#define ice_update_dcb_stats(pf) do {} while (0) -#define ice_pf_dcb_recfg(pf) do {} while (0) -#define ice_vsi_cfg_dcb_rings(vsi) do {} while (0) -#define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0) -#define ice_set_cgd_num(tlan_ctx, ring) do {} while (0) -#define ice_vsi_cfg_netdev_tc(vsi, ena_tc) do {} while (0) +static inline void ice_pf_dcb_recfg(struct ice_pf *pf) { } +static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { } +static inline void ice_update_dcb_stats(struct ice_pf *pf) { } +static inline void +ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { } +static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { } +static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) { } #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h index 6c630a362293..eac2f34bdcdd 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h @@ -11,9 +11,10 @@ void ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, struct ice_dcbx_cfg *new_cfg); #else -#define ice_dcbnl_setup(vsi) do {} while (0) -#define ice_dcbnl_set_all(vsi) do {} while (0) -#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0) +static inline void ice_dcbnl_setup(struct ice_vsi *vsi) { } +static inline void ice_dcbnl_set_all(struct ice_vsi *vsi) { } +static inline void +ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg) { } #endif /* CONFIG_DCB */ - #endif /* _ICE_DCB_NL_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index cf685eeea198..91b545ab8b8f 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -276,6 +276,12 @@ static int ice_devlink_info_get(struct devlink *devlink, size_t i; int err; + err = ice_wait_for_reset(pf, 10 * HZ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting"); + return err; + } + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -283,6 +289,9 @@ static int ice_devlink_info_get(struct devlink *devlink, /* discover capabilities first */ status = ice_discover_dev_caps(hw, &ctx->dev_caps); if (status) { + dev_dbg(dev, "Failed to discover device capabilities, status %s aq_err %s\n", + ice_stat_str(status), ice_aq_str(hw->adminq.sq_last_status)); + NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities"); err = -EIO; goto out_free_ctx; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 99301ad95290..d95a5daca114 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3195,6 +3195,31 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return 0; } +static int +ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +{ + struct ice_pf *pf = ice_netdev_to_pf(dev); + + /* only report timestamping if PTP is enabled */ + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return ethtool_op_get_ts_info(dev, info); + + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->phc_index = ice_get_ptp_clock_index(pf); + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + /** * ice_get_max_txq - return the maximum number of Tx queues for in a PF * @pf: PF structure @@ -3462,13 +3487,9 @@ static int ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, struct ice_ring_container *rc) { - struct ice_pf *pf; - if (!rc->ring) return -EINVAL; - pf = rc->ring->vsi->back; - switch (c_type) { case ICE_RX_CONTAINER: ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); @@ -3480,7 +3501,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, ec->tx_coalesce_usecs = rc->itr_setting; break; default: - dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type); return -EINVAL; } @@ -3990,7 +4011,7 @@ static const struct ethtool_ops ice_ethtool_ops = { .set_rxfh = ice_set_rxfh, .get_channels = ice_get_channels, .set_channels = ice_set_channels, - .get_ts_info = ethtool_op_get_ts_info, + .get_ts_info = ice_get_ts_info, .get_per_queue_coalesce = ice_get_per_q_coalesce, .set_per_queue_coalesce = ice_set_per_q_coalesce, .get_fecparam = ice_get_fecparam, diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index dcec0360ce55..f8601d5b0b19 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -702,6 +702,16 @@ int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw, } err = pldmfw_flash_image(&priv.context, fw); + if (err == -ENOENT) { + dev_err(dev, "Firmware image has no record matching this device\n"); + NL_SET_ERR_MSG_MOD(extack, "Firmware image has no record matching this device"); + } else if (err) { + /* Do not set a generic extended ACK message here. A more + * specific message may already have been set by one of our + * ops. + */ + dev_err(dev, "Failed to flash PLDM image, err %d", err); + } ice_release_nvm(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 9b8300d4a267..76021d977b60 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -52,6 +52,54 @@ #define PF_MBX_ATQLEN_ATQCRIT_M BIT(30) #define PF_MBX_ATQLEN_ATQENABLE_M BIT(31) #define PF_MBX_ATQT 0x0022E300 +#define PF_SB_ARQBAH 0x0022FF00 +#define PF_SB_ARQBAH_ARQBAH_S 0 +#define PF_SB_ARQBAH_ARQBAH_M ICE_M(0xFFFFFFFF, 0) +#define PF_SB_ARQBAL 0x0022FE80 +#define PF_SB_ARQBAL_ARQBAL_LSB_S 0 +#define PF_SB_ARQBAL_ARQBAL_LSB_M ICE_M(0x3F, 0) +#define PF_SB_ARQBAL_ARQBAL_S 6 +#define PF_SB_ARQBAL_ARQBAL_M ICE_M(0x3FFFFFF, 6) +#define PF_SB_ARQH 0x00230000 +#define PF_SB_ARQH_ARQH_S 0 +#define PF_SB_ARQH_ARQH_M ICE_M(0x3FF, 0) +#define PF_SB_ARQLEN 0x0022FF80 +#define PF_SB_ARQLEN_ARQLEN_S 0 +#define PF_SB_ARQLEN_ARQLEN_M ICE_M(0x3FF, 0) +#define PF_SB_ARQLEN_ARQVFE_S 28 +#define PF_SB_ARQLEN_ARQVFE_M BIT(28) +#define PF_SB_ARQLEN_ARQOVFL_S 29 +#define PF_SB_ARQLEN_ARQOVFL_M BIT(29) +#define PF_SB_ARQLEN_ARQCRIT_S 30 +#define PF_SB_ARQLEN_ARQCRIT_M BIT(30) +#define PF_SB_ARQLEN_ARQENABLE_S 31 +#define PF_SB_ARQLEN_ARQENABLE_M BIT(31) +#define PF_SB_ARQT 0x00230080 +#define PF_SB_ARQT_ARQT_S 0 +#define PF_SB_ARQT_ARQT_M ICE_M(0x3FF, 0) +#define PF_SB_ATQBAH 0x0022FC80 +#define PF_SB_ATQBAH_ATQBAH_S 0 +#define PF_SB_ATQBAH_ATQBAH_M ICE_M(0xFFFFFFFF, 0) +#define PF_SB_ATQBAL 0x0022FC00 +#define PF_SB_ATQBAL_ATQBAL_S 6 +#define PF_SB_ATQBAL_ATQBAL_M ICE_M(0x3FFFFFF, 6) +#define PF_SB_ATQH 0x0022FD80 +#define PF_SB_ATQH_ATQH_S 0 +#define PF_SB_ATQH_ATQH_M ICE_M(0x3FF, 0) +#define PF_SB_ATQLEN 0x0022FD00 +#define PF_SB_ATQLEN_ATQLEN_S 0 +#define PF_SB_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0) +#define PF_SB_ATQLEN_ATQVFE_S 28 +#define PF_SB_ATQLEN_ATQVFE_M BIT(28) +#define PF_SB_ATQLEN_ATQOVFL_S 29 +#define PF_SB_ATQLEN_ATQOVFL_M BIT(29) +#define PF_SB_ATQLEN_ATQCRIT_S 30 +#define PF_SB_ATQLEN_ATQCRIT_M BIT(30) +#define PF_SB_ATQLEN_ATQENABLE_S 31 +#define PF_SB_ATQLEN_ATQENABLE_M BIT(31) +#define PF_SB_ATQT 0x0022FE00 +#define PF_SB_ATQT_ATQT_S 0 +#define PF_SB_ATQT_ATQT_M ICE_M(0x3FF, 0) #define PRTDCB_GENC 0x00083000 #define PRTDCB_GENC_PFCLDA_S 16 #define PRTDCB_GENC_PFCLDA_M ICE_M(0xFFFF, 16) @@ -90,6 +138,10 @@ #define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S 4 #define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M ICE_M(0x3, 4) #define GLGEN_CLKSTAT_SRC 0x000B826C +#define GLGEN_GPIO_CTL(_i) (0x000880C8 + ((_i) * 4)) +#define GLGEN_GPIO_CTL_PIN_DIR_M BIT(4) +#define GLGEN_GPIO_CTL_PIN_FUNC_S 8 +#define GLGEN_GPIO_CTL_PIN_FUNC_M ICE_M(0xF, 8) #define GLGEN_RSTAT 0x000B8188 #define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, 0) #define GLGEN_RSTCTL 0x000B8180 @@ -111,8 +163,6 @@ #define VPGEN_VFRSTAT_VFRD_M BIT(0) #define VPGEN_VFRTRIG(_VF) (0x00090000 + ((_VF) * 4)) #define VPGEN_VFRTRIG_VFSWR_M BIT(0) -#define PFHMC_ERRORDATA 0x00520500 -#define PFHMC_ERRORINFO 0x00520400 #define GLINT_CTL 0x0016CC54 #define GLINT_CTL_DIS_AUTOMASK_M BIT(0) #define GLINT_CTL_ITR_GRAN_200_S 16 @@ -156,11 +206,14 @@ #define PFINT_MBX_CTL_ITR_INDX_M ICE_M(0x3, 11) #define PFINT_MBX_CTL_CAUSE_ENA_M BIT(30) #define PFINT_OICR 0x0016CA00 +#define PFINT_OICR_TSYN_TX_M BIT(11) +#define PFINT_OICR_TSYN_EVNT_M BIT(12) #define PFINT_OICR_ECC_ERR_M BIT(16) #define PFINT_OICR_MAL_DETECT_M BIT(19) #define PFINT_OICR_GRST_M BIT(20) #define PFINT_OICR_PCI_EXCEPTION_M BIT(21) #define PFINT_OICR_HMC_ERR_M BIT(26) +#define PFINT_OICR_PE_PUSH_M BIT(27) #define PFINT_OICR_PE_CRITERR_M BIT(28) #define PFINT_OICR_VFLR_M BIT(29) #define PFINT_OICR_SWINT_M BIT(31) @@ -170,6 +223,9 @@ #define PFINT_OICR_CTL_ITR_INDX_M ICE_M(0x3, 11) #define PFINT_OICR_CTL_CAUSE_ENA_M BIT(30) #define PFINT_OICR_ENA 0x0016C900 +#define PFINT_SB_CTL 0x0016B600 +#define PFINT_SB_CTL_MSIX_INDX_M ICE_M(0x7FF, 0) +#define PFINT_SB_CTL_CAUSE_ENA_M BIT(30) #define QINT_RQCTL(_QRX) (0x00150000 + ((_QRX) * 4)) #define QINT_RQCTL_MSIX_INDX_S 0 #define QINT_RQCTL_MSIX_INDX_M ICE_M(0x7FF, 0) @@ -383,6 +439,36 @@ #define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) #define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) #define PRTRPB_RDPC 0x000AC260 +#define GLTSYN_AUX_IN_0(_i) (0x000889D8 + ((_i) * 4)) +#define GLTSYN_AUX_IN_0_INT_ENA_M BIT(4) +#define GLTSYN_AUX_OUT_0(_i) (0x00088998 + ((_i) * 4)) +#define GLTSYN_AUX_OUT_0_OUT_ENA_M BIT(0) +#define GLTSYN_AUX_OUT_0_OUTMOD_M ICE_M(0x3, 1) +#define GLTSYN_CLKO_0(_i) (0x000889B8 + ((_i) * 4)) +#define GLTSYN_CMD 0x00088810 +#define GLTSYN_CMD_SYNC 0x00088814 +#define GLTSYN_ENA(_i) (0x00088808 + ((_i) * 4)) +#define GLTSYN_ENA_TSYN_ENA_M BIT(0) +#define GLTSYN_EVNT_H_0(_i) (0x00088970 + ((_i) * 4)) +#define GLTSYN_EVNT_L_0(_i) (0x00088968 + ((_i) * 4)) +#define GLTSYN_INCVAL_H(_i) (0x00088920 + ((_i) * 4)) +#define GLTSYN_INCVAL_L(_i) (0x00088918 + ((_i) * 4)) +#define GLTSYN_SHADJ_H(_i) (0x00088910 + ((_i) * 4)) +#define GLTSYN_SHADJ_L(_i) (0x00088908 + ((_i) * 4)) +#define GLTSYN_SHTIME_0(_i) (0x000888E0 + ((_i) * 4)) +#define GLTSYN_SHTIME_H(_i) (0x000888F0 + ((_i) * 4)) +#define GLTSYN_SHTIME_L(_i) (0x000888E8 + ((_i) * 4)) +#define GLTSYN_STAT(_i) (0x000888C0 + ((_i) * 4)) +#define GLTSYN_STAT_EVENT0_M BIT(0) +#define GLTSYN_STAT_EVENT1_M BIT(1) +#define GLTSYN_STAT_EVENT2_M BIT(2) +#define GLTSYN_SYNC_DLAY 0x00088818 +#define GLTSYN_TGT_H_0(_i) (0x00088930 + ((_i) * 4)) +#define GLTSYN_TGT_L_0(_i) (0x00088928 + ((_i) * 4)) +#define GLTSYN_TIME_H(_i) (0x000888D8 + ((_i) * 4)) +#define GLTSYN_TIME_L(_i) (0x000888D0 + ((_i) * 4)) +#define PFTSYN_SEM 0x00088880 +#define PFTSYN_SEM_BUSY_M BIT(0) #define VSIQF_FD_CNT(_VSI) (0x00464000 + ((_VSI) * 4)) #define VSIQF_FD_CNT_FD_GCNT_S 0 #define VSIQF_FD_CNT_FD_GCNT_M ICE_M(0x3FFF, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c new file mode 100644 index 000000000000..1f2afdf6cd48 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021, Intel Corporation. */ + +/* Inter-Driver Communication */ +#include "ice.h" +#include "ice_lib.h" +#include "ice_dcb_lib.h" + +/** + * ice_get_auxiliary_drv - retrieve iidc_auxiliary_drv struct + * @pf: pointer to PF struct + * + * This function has to be called with a device_lock on the + * pf->adev.dev to avoid race conditions. + */ +static struct iidc_auxiliary_drv *ice_get_auxiliary_drv(struct ice_pf *pf) +{ + struct auxiliary_device *adev; + + adev = pf->adev; + if (!adev || !adev->dev.driver) + return NULL; + + return container_of(adev->dev.driver, struct iidc_auxiliary_drv, + adrv.driver); +} + +/** + * ice_send_event_to_aux - send event to RDMA AUX driver + * @pf: pointer to PF struct + * @event: event struct + */ +void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) +{ + struct iidc_auxiliary_drv *iadrv; + + if (!pf->adev) + return; + + device_lock(&pf->adev->dev); + iadrv = ice_get_auxiliary_drv(pf); + if (iadrv && iadrv->event_handler) + iadrv->event_handler(pf, event); + device_unlock(&pf->adev->dev); +} + +/** + * ice_find_vsi - Find the VSI from VSI ID + * @pf: The PF pointer to search in + * @vsi_num: The VSI ID to search for + */ +static struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num) +{ + int i; + + ice_for_each_vsi(pf, i) + if (pf->vsi[i] && pf->vsi[i]->vsi_num == vsi_num) + return pf->vsi[i]; + return NULL; +} + +/** + * ice_add_rdma_qset - Add Leaf Node for RDMA Qset + * @pf: PF struct + * @qset: Resource to be allocated + */ +int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) +{ + u16 max_rdmaqs[ICE_MAX_TRAFFIC_CLASS]; + struct ice_vsi *vsi; + struct device *dev; + u32 qset_teid; + u16 qs_handle; + int status; + int i; + + if (WARN_ON(!pf || !qset)) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EINVAL; + + vsi = ice_get_main_vsi(pf); + if (!vsi) { + dev_err(dev, "RDMA QSet invalid VSI\n"); + return -EINVAL; + } + + ice_for_each_traffic_class(i) + max_rdmaqs[i] = 0; + + max_rdmaqs[qset->tc]++; + qs_handle = qset->qs_handle; + + status = ice_cfg_vsi_rdma(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_rdmaqs); + if (status) { + dev_err(dev, "Failed VSI RDMA Qset config\n"); + return status; + } + + status = ice_ena_vsi_rdma_qset(vsi->port_info, vsi->idx, qset->tc, + &qs_handle, 1, &qset_teid); + if (status) { + dev_err(dev, "Failed VSI RDMA Qset enable\n"); + return status; + } + vsi->qset_handle[qset->tc] = qset->qs_handle; + qset->teid = qset_teid; + + return 0; +} +EXPORT_SYMBOL_GPL(ice_add_rdma_qset); + +/** + * ice_del_rdma_qset - Delete leaf node for RDMA Qset + * @pf: PF struct + * @qset: Resource to be freed + */ +int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) +{ + struct ice_vsi *vsi; + u32 teid; + u16 q_id; + + if (WARN_ON(!pf || !qset)) + return -EINVAL; + + vsi = ice_find_vsi(pf, qset->vport_id); + if (!vsi) { + dev_err(ice_pf_to_dev(pf), "RDMA Invalid VSI\n"); + return -EINVAL; + } + + q_id = qset->qs_handle; + teid = qset->teid; + + vsi->qset_handle[qset->tc] = 0; + + return ice_dis_vsi_rdma_qset(vsi->port_info, 1, &teid, &q_id); +} +EXPORT_SYMBOL_GPL(ice_del_rdma_qset); + +/** + * ice_rdma_request_reset - accept request from RDMA to perform a reset + * @pf: struct for PF + * @reset_type: type of reset + */ +int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type) +{ + enum ice_reset_req reset; + + if (WARN_ON(!pf)) + return -EINVAL; + + switch (reset_type) { + case IIDC_PFR: + reset = ICE_RESET_PFR; + break; + case IIDC_CORER: + reset = ICE_RESET_CORER; + break; + case IIDC_GLOBR: + reset = ICE_RESET_GLOBR; + break; + default: + dev_err(ice_pf_to_dev(pf), "incorrect reset request\n"); + return -EINVAL; + } + + return ice_schedule_reset(pf, reset); +} +EXPORT_SYMBOL_GPL(ice_rdma_request_reset); + +/** + * ice_rdma_update_vsi_filter - update main VSI filters for RDMA + * @pf: pointer to struct for PF + * @vsi_id: VSI HW idx to update filter on + * @enable: bool whether to enable or disable filters + */ +int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable) +{ + struct ice_vsi *vsi; + int status; + + if (WARN_ON(!pf)) + return -EINVAL; + + vsi = ice_find_vsi(pf, vsi_id); + if (!vsi) + return -EINVAL; + + status = ice_cfg_rdma_fltr(&pf->hw, vsi->idx, enable); + if (status) { + dev_err(ice_pf_to_dev(pf), "Failed to %sable RDMA filtering\n", + enable ? "en" : "dis"); + } else { + if (enable) + vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; + else + vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; + } + + return status; +} +EXPORT_SYMBOL_GPL(ice_rdma_update_vsi_filter); + +/** + * ice_get_qos_params - parse QoS params for RDMA consumption + * @pf: pointer to PF struct + * @qos: set of QoS values + */ +void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) +{ + struct ice_dcbx_cfg *dcbx_cfg; + unsigned int i; + u32 up2tc; + + dcbx_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + up2tc = rd32(&pf->hw, PRTDCB_TUP2TC); + + qos->num_tc = ice_dcb_get_num_tc(dcbx_cfg); + for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) + qos->up2tc[i] = (up2tc >> (i * 3)) & 0x7; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i]; +} +EXPORT_SYMBOL_GPL(ice_get_qos_params); + +/** + * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver + * @pf: board private structure to initialize + */ +static int ice_reserve_rdma_qvector(struct ice_pf *pf) +{ + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + int index; + + index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix, + ICE_RES_RDMA_VEC_ID); + if (index < 0) + return index; + pf->num_avail_sw_msix -= pf->num_rdma_msix; + pf->rdma_base_vector = (u16)index; + } + return 0; +} + +/** + * ice_adev_release - function to be mapped to AUX dev's release op + * @dev: pointer to device to free + */ +static void ice_adev_release(struct device *dev) +{ + struct iidc_auxiliary_dev *iadev; + + iadev = container_of(dev, struct iidc_auxiliary_dev, adev.dev); + kfree(iadev); +} + +/** + * ice_plug_aux_dev - allocate and register AUX device + * @pf: pointer to pf struct + */ +int ice_plug_aux_dev(struct ice_pf *pf) +{ + struct iidc_auxiliary_dev *iadev; + struct auxiliary_device *adev; + int ret; + + iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); + if (!iadev) + return -ENOMEM; + + adev = &iadev->adev; + pf->adev = adev; + iadev->pf = pf; + + adev->id = pf->aux_idx; + adev->dev.release = ice_adev_release; + adev->dev.parent = &pf->pdev->dev; + adev->name = IIDC_RDMA_ROCE_NAME; + + ret = auxiliary_device_init(adev); + if (ret) { + pf->adev = NULL; + kfree(iadev); + return ret; + } + + ret = auxiliary_device_add(adev); + if (ret) { + pf->adev = NULL; + auxiliary_device_uninit(adev); + return ret; + } + + return 0; +} + +/* ice_unplug_aux_dev - unregister and free AUX device + * @pf: pointer to pf struct + */ +void ice_unplug_aux_dev(struct ice_pf *pf) +{ + if (!pf->adev) + return; + + auxiliary_device_delete(pf->adev); + auxiliary_device_uninit(pf->adev); + pf->adev = NULL; +} + +/** + * ice_init_rdma - initializes PF for RDMA use + * @pf: ptr to ice_pf + */ +int ice_init_rdma(struct ice_pf *pf) +{ + struct device *dev = &pf->pdev->dev; + int ret; + + /* Reserve vector resources */ + ret = ice_reserve_rdma_qvector(pf); + if (ret < 0) { + dev_err(dev, "failed to reserve vectors for RDMA\n"); + return ret; + } + + return ice_plug_aux_dev(pf); +} diff --git a/drivers/net/ethernet/intel/ice/ice_idc_int.h b/drivers/net/ethernet/intel/ice/ice_idc_int.h new file mode 100644 index 000000000000..b7796b8aecbd --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_idc_int.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021, Intel Corporation. */ + +#ifndef _ICE_IDC_INT_H_ +#define _ICE_IDC_INT_H_ + +#include <linux/net/intel/iidc.h> +#include "ice.h" + +struct ice_pf; + +void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event); + +#endif /* !_ICE_IDC_INT_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 4599fc3b4ed8..37c18c66b5c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -172,6 +172,7 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info) } ice_clear_sriov_cap(pf); + ice_clear_rdma_cap(pf); lag->bonded = true; lag->role = ICE_LAG_UNSET; @@ -222,6 +223,7 @@ ice_lag_unlink(struct ice_lag *lag, } ice_set_sriov_cap(pf); + ice_set_rdma_cap(pf); lag->bonded = false; lag->role = ICE_LAG_NONE; } diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 21329ed3087e..80736e0ec0dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -161,7 +161,6 @@ struct ice_fltr_desc { #define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES 0x1ULL struct ice_rx_ptype_decoded { - u32 ptype:10; u32 known:1; u32 outer_ip:1; u32 outer_ip_ver:2; @@ -606,9 +605,32 @@ struct ice_tlan_ctx { u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; -/* macro to make the table lines short */ +/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the + * hardware to a bit-field that can be used by SW to more easily determine the + * packet type. + * + * Macros are used to shorten the table lines and make this table human + * readable. + * + * We store the PTYPE in the top byte of the bit field - this is just so that + * we can check that the table doesn't have a row missing, as the index into + * the table should be the PTYPE. + * + * Typical work flow: + * + * IF NOT ice_ptype_lkup[ptype].known + * THEN + * Packet is unknown + * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP + * Use the rest of the fields to look at the tunnels, inner protocols, etc + * ELSE + * Use the enum ice_rx_l2_ptype to decode the packet type + * ENDIF + */ + +/* macro to make the table lines short, use explicit indexing with [PTYPE] */ #define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - { PTYPE, \ + [PTYPE] = { \ 1, \ ICE_RX_PTYPE_OUTER_##OUTER_IP, \ ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \ @@ -619,18 +641,18 @@ struct ice_tlan_ctx { ICE_RX_PTYPE_INNER_PROT_##I, \ ICE_RX_PTYPE_PAYLOAD_LAYER_##PL } -#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* shorter macros makes the table fit but are terse */ #define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG #define ICE_RX_PTYPE_FRG ICE_RX_PTYPE_FRAG -/* Lookup table mapping the HW PTYPE to the bit field for decoding */ -static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = { +/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */ +static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = { /* L2 Packet types */ ICE_PTT_UNUSED_ENTRY(0), ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - ICE_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), + ICE_PTT_UNUSED_ENTRY(2), ICE_PTT_UNUSED_ENTRY(3), ICE_PTT_UNUSED_ENTRY(4), ICE_PTT_UNUSED_ENTRY(5), @@ -744,7 +766,7 @@ static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = { /* Non Tunneled IPv6 */ ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3), + ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), ICE_PTT_UNUSED_ENTRY(91), ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), @@ -832,118 +854,7 @@ static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = { ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), /* unused entries */ - ICE_PTT_UNUSED_ENTRY(154), - ICE_PTT_UNUSED_ENTRY(155), - ICE_PTT_UNUSED_ENTRY(156), - ICE_PTT_UNUSED_ENTRY(157), - ICE_PTT_UNUSED_ENTRY(158), - ICE_PTT_UNUSED_ENTRY(159), - - ICE_PTT_UNUSED_ENTRY(160), - ICE_PTT_UNUSED_ENTRY(161), - ICE_PTT_UNUSED_ENTRY(162), - ICE_PTT_UNUSED_ENTRY(163), - ICE_PTT_UNUSED_ENTRY(164), - ICE_PTT_UNUSED_ENTRY(165), - ICE_PTT_UNUSED_ENTRY(166), - ICE_PTT_UNUSED_ENTRY(167), - ICE_PTT_UNUSED_ENTRY(168), - ICE_PTT_UNUSED_ENTRY(169), - - ICE_PTT_UNUSED_ENTRY(170), - ICE_PTT_UNUSED_ENTRY(171), - ICE_PTT_UNUSED_ENTRY(172), - ICE_PTT_UNUSED_ENTRY(173), - ICE_PTT_UNUSED_ENTRY(174), - ICE_PTT_UNUSED_ENTRY(175), - ICE_PTT_UNUSED_ENTRY(176), - ICE_PTT_UNUSED_ENTRY(177), - ICE_PTT_UNUSED_ENTRY(178), - ICE_PTT_UNUSED_ENTRY(179), - - ICE_PTT_UNUSED_ENTRY(180), - ICE_PTT_UNUSED_ENTRY(181), - ICE_PTT_UNUSED_ENTRY(182), - ICE_PTT_UNUSED_ENTRY(183), - ICE_PTT_UNUSED_ENTRY(184), - ICE_PTT_UNUSED_ENTRY(185), - ICE_PTT_UNUSED_ENTRY(186), - ICE_PTT_UNUSED_ENTRY(187), - ICE_PTT_UNUSED_ENTRY(188), - ICE_PTT_UNUSED_ENTRY(189), - - ICE_PTT_UNUSED_ENTRY(190), - ICE_PTT_UNUSED_ENTRY(191), - ICE_PTT_UNUSED_ENTRY(192), - ICE_PTT_UNUSED_ENTRY(193), - ICE_PTT_UNUSED_ENTRY(194), - ICE_PTT_UNUSED_ENTRY(195), - ICE_PTT_UNUSED_ENTRY(196), - ICE_PTT_UNUSED_ENTRY(197), - ICE_PTT_UNUSED_ENTRY(198), - ICE_PTT_UNUSED_ENTRY(199), - - ICE_PTT_UNUSED_ENTRY(200), - ICE_PTT_UNUSED_ENTRY(201), - ICE_PTT_UNUSED_ENTRY(202), - ICE_PTT_UNUSED_ENTRY(203), - ICE_PTT_UNUSED_ENTRY(204), - ICE_PTT_UNUSED_ENTRY(205), - ICE_PTT_UNUSED_ENTRY(206), - ICE_PTT_UNUSED_ENTRY(207), - ICE_PTT_UNUSED_ENTRY(208), - ICE_PTT_UNUSED_ENTRY(209), - - ICE_PTT_UNUSED_ENTRY(210), - ICE_PTT_UNUSED_ENTRY(211), - ICE_PTT_UNUSED_ENTRY(212), - ICE_PTT_UNUSED_ENTRY(213), - ICE_PTT_UNUSED_ENTRY(214), - ICE_PTT_UNUSED_ENTRY(215), - ICE_PTT_UNUSED_ENTRY(216), - ICE_PTT_UNUSED_ENTRY(217), - ICE_PTT_UNUSED_ENTRY(218), - ICE_PTT_UNUSED_ENTRY(219), - - ICE_PTT_UNUSED_ENTRY(220), - ICE_PTT_UNUSED_ENTRY(221), - ICE_PTT_UNUSED_ENTRY(222), - ICE_PTT_UNUSED_ENTRY(223), - ICE_PTT_UNUSED_ENTRY(224), - ICE_PTT_UNUSED_ENTRY(225), - ICE_PTT_UNUSED_ENTRY(226), - ICE_PTT_UNUSED_ENTRY(227), - ICE_PTT_UNUSED_ENTRY(228), - ICE_PTT_UNUSED_ENTRY(229), - - ICE_PTT_UNUSED_ENTRY(230), - ICE_PTT_UNUSED_ENTRY(231), - ICE_PTT_UNUSED_ENTRY(232), - ICE_PTT_UNUSED_ENTRY(233), - ICE_PTT_UNUSED_ENTRY(234), - ICE_PTT_UNUSED_ENTRY(235), - ICE_PTT_UNUSED_ENTRY(236), - ICE_PTT_UNUSED_ENTRY(237), - ICE_PTT_UNUSED_ENTRY(238), - ICE_PTT_UNUSED_ENTRY(239), - - ICE_PTT_UNUSED_ENTRY(240), - ICE_PTT_UNUSED_ENTRY(241), - ICE_PTT_UNUSED_ENTRY(242), - ICE_PTT_UNUSED_ENTRY(243), - ICE_PTT_UNUSED_ENTRY(244), - ICE_PTT_UNUSED_ENTRY(245), - ICE_PTT_UNUSED_ENTRY(246), - ICE_PTT_UNUSED_ENTRY(247), - ICE_PTT_UNUSED_ENTRY(248), - ICE_PTT_UNUSED_ENTRY(249), - - ICE_PTT_UNUSED_ENTRY(250), - ICE_PTT_UNUSED_ENTRY(251), - ICE_PTT_UNUSED_ENTRY(252), - ICE_PTT_UNUSED_ENTRY(253), - ICE_PTT_UNUSED_ENTRY(254), - ICE_PTT_UNUSED_ENTRY(255), + [154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 27f9dac8719c..dde9802c6c72 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -169,12 +169,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) switch (vsi->type) { case ICE_VSI_PF: - vsi->alloc_txq = min3(pf->num_lan_msix, - ice_get_avail_txq_count(pf), - (u16)num_online_cpus()); if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; + } else { + vsi->alloc_txq = min3(pf->num_lan_msix, + ice_get_avail_txq_count(pf), + (u16)num_online_cpus()); } pf->num_lan_tx = vsi->alloc_txq; @@ -183,12 +184,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { vsi->alloc_rxq = 1; } else { - vsi->alloc_rxq = min3(pf->num_lan_msix, - ice_get_avail_rxq_count(pf), - (u16)num_online_cpus()); if (vsi->req_rxq) { vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; + } else { + vsi->alloc_rxq = min3(pf->num_lan_msix, + ice_get_avail_rxq_count(pf), + (u16)num_online_cpus()); } } @@ -629,6 +631,17 @@ bool ice_is_safe_mode(struct ice_pf *pf) } /** + * ice_is_aux_ena + * @pf: pointer to the PF struct + * + * returns true if AUX devices/drivers are supported, false otherwise + */ +bool ice_is_aux_ena(struct ice_pf *pf) +{ + return test_bit(ICE_FLAG_AUX_ENA, pf->flags); +} + +/** * ice_vsi_clean_rss_flow_fld - Delete RSS configuration * @vsi: the VSI being cleaned up * @@ -1192,11 +1205,11 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - struct ice_vf *vf; int i; ice_for_each_vf(pf, i) { - vf = &pf->vf[i]; + struct ice_vf *vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; break; @@ -1285,6 +1298,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->reg_idx = vsi->txq_map[i]; ring->ring_active = false; ring->vsi = vsi; + ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; ring->count = vsi->num_tx_desc; WRITE_ONCE(vsi->tx_rings[i], ring); @@ -1662,9 +1676,11 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) * @pf_q: index of the Rx queue in the PF's queue space * @rxdid: flexible descriptor RXDID * @prio: priority for the RXDID for this queue + * @ena_ts: true to enable timestamp and false to disable timestamp */ void -ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio) +ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, + bool ena_ts) { int regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); @@ -1679,9 +1695,40 @@ ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio) regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) & QRXFLXP_CNTXT_RXDID_PRIO_M; + if (ena_ts) + /* Enable TimeSync on this queue */ + regval |= QRXFLXP_CNTXT_TS_M; + wr32(hw, QRXFLXP_CNTXT(pf_q), regval); } +int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) +{ + if (q_idx >= vsi->num_rxq) + return -EINVAL; + + return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]); +} + +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx) +{ + struct ice_aqc_add_tx_qgrp *qg_buf; + int err; + + if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx]) + return -EINVAL; + + qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL); + if (!qg_buf) + return -ENOMEM; + + qg_buf->num_txqs = 1; + + err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf); + kfree(qg_buf); + return err; +} + /** * ice_vsi_cfg_rxqs - Configure the VSI for Rx * @vsi: the VSI being configured @@ -1699,15 +1746,11 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) ice_vsi_cfg_frame_size(vsi); setup_rings: /* set up individual rings */ - for (i = 0; i < vsi->num_rxq; i++) { - int err; + ice_for_each_rxq(vsi, i) { + int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]); - err = ice_setup_rx_ctx(vsi->rx_rings[i]); - if (err) { - dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n", - i, err); + if (err) return err; - } } return 0; @@ -2217,7 +2260,7 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) } if (status) - dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n", + dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n", create ? "adding" : "removing", tx ? "TX" : "RX", vsi->vsi_num, ice_stat_str(status)); } @@ -2832,11 +2875,11 @@ int ice_vsi_release(struct ice_vsi *vsi) * cleared in the same manner. */ if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - struct ice_vf *vf; int i; ice_for_each_vf(pf, i) { - vf = &pf->vf[i]; + struct ice_vf *vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) break; } @@ -3196,6 +3239,34 @@ bool ice_is_reset_in_progress(unsigned long *state) test_bit(ICE_GLOBR_REQ, state); } +/** + * ice_wait_for_reset - Wait for driver to finish reset and rebuild + * @pf: pointer to the PF structure + * @timeout: length of time to wait, in jiffies + * + * Wait (sleep) for a short time until the driver finishes cleaning up from + * a device reset. The caller must be able to sleep. Use this to delay + * operations that could fail while the driver is cleaning up after a device + * reset. + * + * Returns 0 on success, -EBUSY if the reset is not finished within the + * timeout, and -ERESTARTSYS if the thread was interrupted. + */ +int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout) +{ + long ret; + + ret = wait_event_interruptible_timeout(pf->reset_wait_queue, + !ice_is_reset_in_progress(pf->state), + timeout); + if (ret < 0) + return ret; + else if (!ret) + return -EBUSY; + else + return 0; +} + #ifdef CONFIG_DCB /** * ice_vsi_update_q_map - update our copy of the VSI info with new queue map @@ -3330,13 +3401,22 @@ int ice_status_to_errno(enum ice_status err) case ICE_ERR_DOES_NOT_EXIST: return -ENOENT; case ICE_ERR_OUT_OF_RANGE: - return -ENOTTY; + case ICE_ERR_AQ_ERROR: + case ICE_ERR_AQ_TIMEOUT: + case ICE_ERR_AQ_EMPTY: + case ICE_ERR_AQ_FW_CRITICAL: + return -EIO; case ICE_ERR_PARAM: + case ICE_ERR_INVAL_SIZE: return -EINVAL; case ICE_ERR_NO_MEMORY: return -ENOMEM; case ICE_ERR_MAX_LIMIT: return -EAGAIN; + case ICE_ERR_RESET_ONGOING: + return -EBUSY; + case ICE_ERR_AQ_FULL: + return -ENOSPC; default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 511c2316c40c..d5a28bf0fc2c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -12,6 +12,10 @@ bool ice_pf_state_is_nominal(struct ice_pf *pf); void ice_update_eth_stats(struct ice_vsi *vsi); +int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx); + +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx); + int ice_vsi_cfg_rxqs(struct ice_vsi *vsi); int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); @@ -73,9 +77,11 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id); int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi); bool ice_is_reset_in_progress(unsigned long *state); +int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout); void -ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio); +ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, + bool ena_ts); void ice_vsi_dis_irq(struct ice_vsi *vsi); @@ -102,7 +108,7 @@ enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); bool ice_is_safe_mode(struct ice_pf *pf); - +bool ice_is_aux_ena(struct ice_pf *pf); bool ice_is_dflt_vsi_in_use(struct ice_sw *sw); bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0eb2307325d3..ef8d1815af56 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -13,6 +13,12 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" #include "ice_devlink.h" +/* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the + * ice tracepoint functions. This must be done exactly once across the + * ice driver. + */ +#define CREATE_TRACE_POINTS +#include "ice_trace.h" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" static const char ice_driver_string[] = DRV_SUMMARY; @@ -35,6 +41,8 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ +static DEFINE_IDA(ice_aux_ida); + static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; @@ -454,6 +462,8 @@ ice_prepare_for_reset(struct ice_pf *pf) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + ice_unplug_aux_dev(pf); + /* Notify VFs of impending reset */ if (ice_check_sq_alive(hw, &hw->mailboxq)) ice_vc_notify_reset(pf); @@ -467,6 +477,9 @@ ice_prepare_for_reset(struct ice_pf *pf) /* disable the VSIs and their queues that are not already DOWN */ ice_pf_dis_all_vsi(pf, false); + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_release(pf); + if (hw->port_info) ice_sched_clear_port(hw->port_info); @@ -499,6 +512,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) clear_bit(ICE_PFR_REQ, pf->state); clear_bit(ICE_CORER_REQ, pf->state); clear_bit(ICE_GLOBR_REQ, pf->state); + wake_up(&pf->reset_wait_queue); return; } @@ -511,6 +525,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) ice_rebuild(pf, reset_type); clear_bit(ICE_PREPARED_FOR_RESET, pf->state); clear_bit(ICE_PFR_REQ, pf->state); + wake_up(&pf->reset_wait_queue); ice_reset_all_vfs(pf, true); } } @@ -561,6 +576,7 @@ static void ice_reset_subtask(struct ice_pf *pf) clear_bit(ICE_PFR_REQ, pf->state); clear_bit(ICE_CORER_REQ, pf->state); clear_bit(ICE_GLOBR_REQ, pf->state); + wake_up(&pf->reset_wait_queue); ice_reset_all_vfs(pf, true); } @@ -858,6 +874,38 @@ static void ice_set_dflt_mib(struct ice_pf *pf) } /** + * ice_check_module_power + * @pf: pointer to PF struct + * @link_cfg_err: bitmap from the link info structure + * + * check module power level returned by a previous call to aq_get_link_info + * and print error messages if module power level is not supported + */ +static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err) +{ + /* if module power level is supported, clear the flag */ + if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT | + ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) { + clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); + return; + } + + /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the + * above block didn't clear this bit, there's nothing to do + */ + if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags)) + return; + + if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) { + dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n"); + set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); + } else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) { + dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n"); + set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); + } +} + +/** * ice_link_event - process the link event * @pf: PF that the link event is associated with * @pi: port_info for the port that the link event is associated with @@ -892,6 +940,8 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, pi->lport, ice_stat_str(status), ice_aq_str(pi->hw->adminq.sq_last_status)); + ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + /* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. */ @@ -1190,6 +1240,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) cq = &hw->adminq; qtype = "Admin"; break; + case ICE_CTL_Q_SB: + cq = &hw->sbq; + qtype = "Sideband"; + break; case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; qtype = "Mailbox"; @@ -1364,6 +1418,34 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) } /** + * ice_clean_sbq_subtask - clean the Sideband Queue rings + * @pf: board private structure + */ +static void ice_clean_sbq_subtask(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + /* Nothing to do here if sideband queue is not supported */ + if (!ice_is_sbq_supported(hw)) { + clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); + return; + } + + if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state)) + return; + + if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB)) + return; + + clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); + + if (ice_ctrlq_pending(hw, &hw->sbq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_SB); + + ice_flush(hw); +} + +/** * ice_service_task_schedule - schedule the service task to wake up * @pf: board private structure * @@ -2006,6 +2088,8 @@ static void ice_check_media_subtask(struct ice_pf *pf) if (err) return; + ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) ice_init_phy_user_cfg(pi); @@ -2063,6 +2147,7 @@ static void ice_service_task(struct work_struct *work) ice_process_vflr_event(pf); ice_clean_mailboxq_subtask(pf); + ice_clean_sbq_subtask(pf); ice_sync_arfs_fltrs(pf); ice_flush_fdir_ctx(pf); @@ -2078,6 +2163,7 @@ static void ice_service_task(struct work_struct *work) test_bit(ICE_VFLR_EVENT_PENDING, pf->state) || test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) || test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) || + test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) || test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@ -2096,6 +2182,10 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN; hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; + hw->sbq.num_rq_entries = ICE_SBQ_LEN; + hw->sbq.num_sq_entries = ICE_SBQ_LEN; + hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN; + hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN; } /** @@ -2118,6 +2208,8 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) return -EBUSY; } + ice_unplug_aux_dev(pf); + switch (reset) { case ICE_RESET_PFR: set_bit(ICE_PFR_REQ, pf->state); @@ -2622,6 +2714,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf) PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_VFLR_M | PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_PUSH_M | PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); @@ -2647,6 +2740,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) dev = ice_pf_to_dev(pf); set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); + set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); @@ -2692,8 +2786,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) /* If a reset cycle isn't already in progress, we set a bit in * pf->state so that the service task can start a reset/rebuild. - * We also make note of which reset happened so that peer - * devices/drivers can be informed. */ if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) { if (reset == ICE_RESET_CORER) @@ -2720,11 +2812,36 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } } - if (oicr & PFINT_OICR_HMC_ERR_M) { - ena_mask &= ~PFINT_OICR_HMC_ERR_M; - dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n", - rd32(hw, PFHMC_ERRORINFO), - rd32(hw, PFHMC_ERRORDATA)); + if (oicr & PFINT_OICR_TSYN_TX_M) { + ena_mask &= ~PFINT_OICR_TSYN_TX_M; + ice_ptp_process_ts(pf); + } + + if (oicr & PFINT_OICR_TSYN_EVNT_M) { + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx)); + + /* Save EVENTs from GTSYN register */ + pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M | + GLTSYN_STAT_EVENT1_M | + GLTSYN_STAT_EVENT2_M); + ena_mask &= ~PFINT_OICR_TSYN_EVNT_M; + kthread_queue_work(pf->ptp.kworker, &pf->ptp.extts_work); + } + +#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) + if (oicr & ICE_AUX_CRIT_ERR) { + struct iidc_event *event; + + ena_mask &= ~ICE_AUX_CRIT_ERR; + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event) { + set_bit(IIDC_EVENT_CRIT_ERR, event->type); + /* report the entire OICR value to AUX driver */ + event->reg = oicr; + ice_send_event_to_aux(pf, event); + kfree(event); + } } /* Report any remaining unexpected interrupts */ @@ -2734,8 +2851,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) /* If a critical error is pending there is no choice but to * reset the device. */ - if (oicr & (PFINT_OICR_PE_CRITERR_M | - PFINT_OICR_PCI_EXCEPTION_M | + if (oicr & (PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { set_bit(ICE_PFR_REQ, pf->state); ice_service_task_schedule(pf); @@ -2763,6 +2879,9 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) wr32(hw, PFINT_MBX_CTL, rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_SB_CTL, + rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M); + /* disable Control queue Interrupt causes */ wr32(hw, PFINT_OICR_CTL, rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M); @@ -2817,6 +2936,11 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx) PFINT_MBX_CTL_CAUSE_ENA_M); wr32(hw, PFINT_MBX_CTL, val); + /* This enables Sideband queue Interrupt causes */ + val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | + PFINT_SB_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_SB_CTL, val); + ice_flush(hw); } @@ -2986,7 +3110,6 @@ static void ice_set_netdev_features(struct net_device *netdev) */ static int ice_cfg_netdev(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; @@ -3006,7 +3129,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) ice_set_ops(netdev); if (vsi->type == ICE_VSI_PF) { - SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf)); + SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back)); ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); @@ -3280,6 +3403,9 @@ static void ice_deinit_pf(struct ice_pf *pf) bitmap_free(pf->avail_rxqs); pf->avail_rxqs = NULL; } + + if (pf->ptp.clock) + ptp_clock_unregister(pf->ptp.clock); } /** @@ -3290,6 +3416,12 @@ static void ice_set_pf_caps(struct ice_pf *pf) { struct ice_hw_func_caps *func_caps = &pf->hw.func_caps; + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + clear_bit(ICE_FLAG_AUX_ENA, pf->flags); + if (func_caps->common_cap.rdma) { + set_bit(ICE_FLAG_RDMA_ENA, pf->flags); + set_bit(ICE_FLAG_AUX_ENA, pf->flags); + } clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); if (func_caps->common_cap.dcb) set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); @@ -3320,6 +3452,10 @@ static void ice_set_pf_caps(struct ice_pf *pf) func_caps->fd_fltr_best_effort); } + clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); + if (func_caps->common_cap.ieee_1588) + set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); + pf->max_pf_txqs = func_caps->common_cap.num_txq; pf->max_pf_rxqs = func_caps->common_cap.num_rxq; } @@ -3339,6 +3475,8 @@ static int ice_init_pf(struct ice_pf *pf) spin_lock_init(&pf->aq_wait_lock); init_waitqueue_head(&pf->aq_wait_queue); + init_waitqueue_head(&pf->reset_wait_queue); + /* setup service timer and periodic service task */ timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; @@ -3369,11 +3507,12 @@ static int ice_init_pf(struct ice_pf *pf) */ static int ice_ena_msix_range(struct ice_pf *pf) { - int v_left, v_actual, v_other, v_budget = 0; + int num_cpus, v_left, v_actual, v_other, v_budget = 0; struct device *dev = ice_pf_to_dev(pf); int needed, err, i; v_left = pf->hw.func_caps.common_cap.num_msix_vectors; + num_cpus = num_online_cpus(); /* reserve for LAN miscellaneous handler */ needed = ICE_MIN_LAN_OICR_MSIX; @@ -3395,13 +3534,23 @@ static int ice_ena_msix_range(struct ice_pf *pf) v_other = v_budget; /* reserve vectors for LAN traffic */ - needed = min_t(int, num_online_cpus(), v_left); + needed = num_cpus; if (v_left < needed) goto no_hw_vecs_left_err; pf->num_lan_msix = needed; v_budget += needed; v_left -= needed; + /* reserve vectors for RDMA auxiliary driver */ + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; + if (v_left < needed) + goto no_hw_vecs_left_err; + pf->num_rdma_msix = needed; + v_budget += needed; + v_left -= needed; + } + pf->msix_entries = devm_kcalloc(dev, v_budget, sizeof(*pf->msix_entries), GFP_KERNEL); if (!pf->msix_entries) { @@ -3431,16 +3580,46 @@ static int ice_ena_msix_range(struct ice_pf *pf) err = -ERANGE; goto msix_err; } else { - int v_traffic = v_actual - v_other; + int v_remain = v_actual - v_other; + int v_rdma = 0, v_min_rdma = 0; + + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + /* Need at least 1 interrupt in addition to + * AEQ MSIX + */ + v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; + v_min_rdma = ICE_MIN_RDMA_MSIX; + } if (v_actual == ICE_MIN_MSIX || - v_traffic < ICE_MIN_LAN_TXRX_MSIX) + v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) { + dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n"); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + + pf->num_rdma_msix = 0; pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - else - pf->num_lan_msix = v_traffic; + } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || + (v_remain - v_rdma < v_rdma)) { + /* Support minimum RDMA and give remaining + * vectors to LAN MSIX + */ + pf->num_rdma_msix = v_min_rdma; + pf->num_lan_msix = v_remain - v_min_rdma; + } else { + /* Split remaining MSIX with RDMA after + * accounting for AEQ MSIX + */ + pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + + ICE_RDMA_NUM_AEQ_MSIX; + pf->num_lan_msix = v_remain - pf->num_rdma_msix; + } dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", pf->num_lan_msix); + + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); } } @@ -3455,6 +3634,7 @@ no_hw_vecs_left_err: needed, v_left); err = -ERANGE; exit_err: + pf->num_rdma_msix = 0; pf->num_lan_msix = 0; return err; } @@ -4218,6 +4398,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_init_link_dflt_override(pf->hw.port_info); + ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err); + /* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { @@ -4256,6 +4438,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) } /* initialize DDP driven features */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_init(pf); /* Note: Flow director init failure is non-fatal to load */ if (ice_init_fdir(pf)) @@ -4282,8 +4466,29 @@ probe_done: /* ready to go, so clear down state bit */ clear_bit(ICE_DOWN, pf->state); + if (ice_is_aux_ena(pf)) { + pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL); + if (pf->aux_idx < 0) { + dev_err(dev, "Failed to allocate device ID for AUX driver\n"); + err = -ENOMEM; + goto err_netdev_reg; + } + + err = ice_init_rdma(pf); + if (err) { + dev_err(dev, "Failed to initialize RDMA: %d\n", err); + err = -EIO; + goto err_init_aux_unroll; + } + } else { + dev_warn(dev, "RDMA is not supported on this device\n"); + } + return 0; +err_init_aux_unroll: + pf->adev = NULL; + ida_free(&ice_aux_ida, pf->aux_idx); err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); @@ -4393,13 +4598,17 @@ static void ice_remove(struct pci_dev *pdev) ice_free_vfs(pf); } - set_bit(ICE_DOWN, pf->state); ice_service_task_stop(pf); ice_aq_cancel_waiting_tasks(pf); + ice_unplug_aux_dev(pf); + ida_free(&ice_aux_ida, pf->aux_idx); + set_bit(ICE_DOWN, pf->state); mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_deinit_lag(pf); + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_release(pf); if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); @@ -4552,6 +4761,8 @@ static int __maybe_unused ice_suspend(struct device *dev) */ disabled = ice_service_task_stop(pf); + ice_unplug_aux_dev(pf); + /* Already suspended?, then there is nothing to do */ if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { if (!disabled) @@ -5284,6 +5495,7 @@ static void ice_tx_dim_work(struct work_struct *work) itr = tx_profile[dim->profile_ix].itr; intrl = tx_profile[dim->profile_ix].intrl; + ice_trace(tx_dim_work, q_vector, dim); ice_write_itr(rc, itr); ice_write_intrl(q_vector, intrl); @@ -5308,6 +5520,7 @@ static void ice_rx_dim_work(struct work_struct *work) itr = rx_profile[dim->profile_ix].itr; intrl = rx_profile[dim->profile_ix].intrl; + ice_trace(rx_dim_work, q_vector, dim); ice_write_itr(rc, itr); ice_write_intrl(q_vector, intrl); @@ -5451,7 +5664,6 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings, static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; - struct ice_ring *ring; u64 pkts, bytes; int i; @@ -5475,7 +5687,8 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { - ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_ring *ring = READ_ONCE(vsi->rx_rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; @@ -6142,6 +6355,12 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_clear_pxe_mode(hw); + ret = ice_init_nvm(hw); + if (ret) { + dev_err(dev, "ice_init_nvm failed %s\n", ice_stat_str(ret)); + goto err_init_ctrlq; + } + ret = ice_get_caps(hw); if (ret) { dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret)); @@ -6183,6 +6402,13 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) ice_dcb_rebuild(pf); + /* If the PF previously had enabled PTP, PTP init needs to happen before + * the VSI rebuild. If not, this causes the PTP link status events to + * fail. + */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_init(pf); + /* rebuild PF VSI */ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); if (err) { @@ -6222,6 +6448,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* if we get here, reset flow is successful */ clear_bit(ICE_RESET_FAILED, pf->state); + + ice_plug_aux_dev(pf); return; err_vsi_rebuild: @@ -6260,7 +6488,9 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct iidc_event *event; u8 count = 0; + int err = 0; if (new_mtu == (int)netdev->mtu) { netdev_warn(netdev, "MTU is already %u\n", netdev->mtu); @@ -6293,27 +6523,59 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return -EBUSY; } + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type); + ice_send_event_to_aux(pf, event); + clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type); + netdev->mtu = (unsigned int)new_mtu; /* if VSI is up, bring it down and then back up */ if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - int err; - err = ice_down(vsi); if (err) { netdev_err(netdev, "change MTU if_down err %d\n", err); - return err; + goto event_after; } err = ice_up(vsi); if (err) { netdev_err(netdev, "change MTU if_up err %d\n", err); - return err; + goto event_after; } } netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); - return 0; +event_after: + set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type); + ice_send_event_to_aux(pf, event); + kfree(event); + + return err; +} + +/** + * ice_do_ioctl - Access the hwtstamp interface + * @netdev: network interface device structure + * @ifr: interface request data + * @cmd: ioctl command + */ +static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + switch (cmd) { + case SIOCGHWTSTAMP: + return ice_ptp_get_ts_config(pf, ifr); + case SIOCSHWTSTAMP: + return ice_ptp_set_ts_config(pf, ifr); + default: + return -EOPNOTSUPP; + } } /** @@ -6832,6 +7094,8 @@ int ice_open_internal(struct net_device *netdev) return -EIO; } + ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); @@ -6965,6 +7229,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, + .ndo_do_ioctl = ice_do_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c new file mode 100644 index 000000000000..5d5207b56ca9 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -0,0 +1,1558 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" + +#define E810_OUT_PROP_DELAY_NS 1 + +/** + * ice_set_tx_tstamp - Enable or disable Tx timestamping + * @pf: The PF pointer to search in + * @on: bool value for whether timestamps are enabled or disabled + */ +static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) +{ + struct ice_vsi *vsi; + u32 val; + u16 i; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return; + + /* Set the timestamp enable flag for all the Tx rings */ + ice_for_each_rxq(vsi, i) { + if (!vsi->tx_rings[i]) + continue; + vsi->tx_rings[i]->ptp_tx = on; + } + + /* Configure the Tx timestamp interrupt */ + val = rd32(&pf->hw, PFINT_OICR_ENA); + if (on) + val |= PFINT_OICR_TSYN_TX_M; + else + val &= ~PFINT_OICR_TSYN_TX_M; + wr32(&pf->hw, PFINT_OICR_ENA, val); +} + +/** + * ice_set_rx_tstamp - Enable or disable Rx timestamping + * @pf: The PF pointer to search in + * @on: bool value for whether timestamps are enabled or disabled + */ +static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) +{ + struct ice_vsi *vsi; + u16 i; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return; + + /* Set the timestamp flag for all the Rx rings */ + ice_for_each_rxq(vsi, i) { + if (!vsi->rx_rings[i]) + continue; + vsi->rx_rings[i]->ptp_rx = on; + } +} + +/** + * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit + * @pf: Board private structure + * @ena: bool value to enable or disable time stamp + * + * This function will configure timestamping during PTP initialization + * and deinitialization + */ +static void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) +{ + ice_set_tx_tstamp(pf, ena); + ice_set_rx_tstamp(pf, ena); + + if (ena) { + pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL; + pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON; + } else { + pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF; + } +} + +/** + * ice_get_ptp_clock_index - Get the PTP clock index + * @pf: the PF pointer + * + * Determine the clock index of the PTP clock associated with this device. If + * this is the PF controlling the clock, just use the local access to the + * clock device pointer. + * + * Otherwise, read from the driver shared parameters to determine the clock + * index value. + * + * Returns: the index of the PTP clock associated with this device, or -1 if + * there is no associated clock. + */ +int ice_get_ptp_clock_index(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + enum ice_aqc_driver_params param_idx; + struct ice_hw *hw = &pf->hw; + u8 tmr_idx; + u32 value; + int err; + + /* Use the ptp_clock structure if we're the main PF */ + if (pf->ptp.clock) + return ptp_clock_index(pf->ptp.clock); + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + if (!tmr_idx) + param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0; + else + param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1; + + err = ice_aq_get_driver_param(hw, param_idx, &value, NULL); + if (err) { + dev_err(dev, "Failed to read PTP clock index parameter, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + return -1; + } + + /* The PTP clock index is an integer, and will be between 0 and + * INT_MAX. The highest bit of the driver shared parameter is used to + * indicate whether or not the currently stored clock index is valid. + */ + if (!(value & PTP_SHARED_CLK_IDX_VALID)) + return -1; + + return value & ~PTP_SHARED_CLK_IDX_VALID; +} + +/** + * ice_set_ptp_clock_index - Set the PTP clock index + * @pf: the PF pointer + * + * Set the PTP clock index for this device into the shared driver parameters, + * so that other PFs associated with this device can read it. + * + * If the PF is unable to store the clock index, it will log an error, but + * will continue operating PTP. + */ +static void ice_set_ptp_clock_index(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + enum ice_aqc_driver_params param_idx; + struct ice_hw *hw = &pf->hw; + u8 tmr_idx; + u32 value; + int err; + + if (!pf->ptp.clock) + return; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + if (!tmr_idx) + param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0; + else + param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1; + + value = (u32)ptp_clock_index(pf->ptp.clock); + if (value > INT_MAX) { + dev_err(dev, "PTP Clock index is too large to store\n"); + return; + } + value |= PTP_SHARED_CLK_IDX_VALID; + + err = ice_aq_set_driver_param(hw, param_idx, value, NULL); + if (err) { + dev_err(dev, "Failed to set PTP clock index parameter, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + } +} + +/** + * ice_clear_ptp_clock_index - Clear the PTP clock index + * @pf: the PF pointer + * + * Clear the PTP clock index for this device. Must be called when + * unregistering the PTP clock, in order to ensure other PFs stop reporting + * a clock object that no longer exists. + */ +static void ice_clear_ptp_clock_index(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + enum ice_aqc_driver_params param_idx; + struct ice_hw *hw = &pf->hw; + u8 tmr_idx; + int err; + + /* Do not clear the index if we don't own the timer */ + if (!hw->func_caps.ts_func_info.src_tmr_owned) + return; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + if (!tmr_idx) + param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0; + else + param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1; + + err = ice_aq_set_driver_param(hw, param_idx, 0, NULL); + if (err) { + dev_dbg(dev, "Failed to clear PTP clock index parameter, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + } +} + +/** + * ice_ptp_read_src_clk_reg - Read the source clock register + * @pf: Board private structure + * @sts: Optional parameter for holding a pair of system timestamps from + * the system clock. Will be ignored if NULL is given. + */ +static u64 +ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) +{ + struct ice_hw *hw = &pf->hw; + u32 hi, lo, lo2; + u8 tmr_idx; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + /* Read the system timestamp pre PHC read */ + ptp_read_system_prets(sts); + + lo = rd32(hw, GLTSYN_TIME_L(tmr_idx)); + + /* Read the system timestamp post PHC read */ + ptp_read_system_postts(sts); + + hi = rd32(hw, GLTSYN_TIME_H(tmr_idx)); + lo2 = rd32(hw, GLTSYN_TIME_L(tmr_idx)); + + if (lo2 < lo) { + /* if TIME_L rolled over read TIME_L again and update + * system timestamps + */ + ptp_read_system_prets(sts); + lo = rd32(hw, GLTSYN_TIME_L(tmr_idx)); + ptp_read_system_postts(sts); + hi = rd32(hw, GLTSYN_TIME_H(tmr_idx)); + } + + return ((u64)hi << 32) | lo; +} + +/** + * ice_ptp_update_cached_phctime - Update the cached PHC time values + * @pf: Board specific private structure + * + * This function updates the system time values which are cached in the PF + * structure and the Rx rings. + * + * This function must be called periodically to ensure that the cached value + * is never more than 2 seconds old. It must also be called whenever the PHC + * time has been changed. + */ +static void ice_ptp_update_cached_phctime(struct ice_pf *pf) +{ + u64 systime; + int i; + + /* Read the current PHC time */ + systime = ice_ptp_read_src_clk_reg(pf, NULL); + + /* Update the cached PHC time stored in the PF structure */ + WRITE_ONCE(pf->ptp.cached_phc_time, systime); + + ice_for_each_vsi(pf, i) { + struct ice_vsi *vsi = pf->vsi[i]; + int j; + + if (!vsi) + continue; + + if (vsi->type != ICE_VSI_PF) + continue; + + ice_for_each_rxq(vsi, j) { + if (!vsi->rx_rings[j]) + continue; + WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime); + } + } +} + +/** + * ice_ptp_extend_32b_ts - Convert a 32b nanoseconds timestamp to 64b + * @cached_phc_time: recently cached copy of PHC time + * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value + * + * Hardware captures timestamps which contain only 32 bits of nominal + * nanoseconds, as opposed to the 64bit timestamps that the stack expects. + * Note that the captured timestamp values may be 40 bits, but the lower + * 8 bits are sub-nanoseconds and generally discarded. + * + * Extend the 32bit nanosecond timestamp using the following algorithm and + * assumptions: + * + * 1) have a recently cached copy of the PHC time + * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1 + * seconds) before or after the PHC time was captured. + * 3) calculate the delta between the cached time and the timestamp + * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was + * captured after the PHC time. In this case, the full timestamp is just + * the cached PHC time plus the delta. + * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the + * timestamp was captured *before* the PHC time, i.e. because the PHC + * cache was updated after the timestamp was captured by hardware. In this + * case, the full timestamp is the cached time minus the inverse delta. + * + * This algorithm works even if the PHC time was updated after a Tx timestamp + * was requested, but before the Tx timestamp event was reported from + * hardware. + * + * This calculation primarily relies on keeping the cached PHC time up to + * date. If the timestamp was captured more than 2^31 nanoseconds after the + * PHC time, it is possible that the lower 32bits of PHC time have + * overflowed more than once, and we might generate an incorrect timestamp. + * + * This is prevented by (a) periodically updating the cached PHC time once + * a second, and (b) discarding any Tx timestamp packet if it has waited for + * a timestamp for more than one second. + */ +static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp) +{ + u32 delta, phc_time_lo; + u64 ns; + + /* Extract the lower 32 bits of the PHC time */ + phc_time_lo = (u32)cached_phc_time; + + /* Calculate the delta between the lower 32bits of the cached PHC + * time and the in_tstamp value + */ + delta = (in_tstamp - phc_time_lo); + + /* Do not assume that the in_tstamp is always more recent than the + * cached PHC time. If the delta is large, it indicates that the + * in_tstamp was taken in the past, and should be converted + * forward. + */ + if (delta > (U32_MAX / 2)) { + /* reverse the delta calculation here */ + delta = (phc_time_lo - in_tstamp); + ns = cached_phc_time - delta; + } else { + ns = cached_phc_time + delta; + } + + return ns; +} + +/** + * ice_ptp_extend_40b_ts - Convert a 40b timestamp to 64b nanoseconds + * @pf: Board private structure + * @in_tstamp: Ingress/egress 40b timestamp value + * + * The Tx and Rx timestamps are 40 bits wide, including 32 bits of nominal + * nanoseconds, 7 bits of sub-nanoseconds, and a valid bit. + * + * *--------------------------------------------------------------* + * | 32 bits of nanoseconds | 7 high bits of sub ns underflow | v | + * *--------------------------------------------------------------* + * + * The low bit is an indicator of whether the timestamp is valid. The next + * 7 bits are a capture of the upper 7 bits of the sub-nanosecond underflow, + * and the remaining 32 bits are the lower 32 bits of the PHC timer. + * + * It is assumed that the caller verifies the timestamp is valid prior to + * calling this function. + * + * Extract the 32bit nominal nanoseconds and extend them. Use the cached PHC + * time stored in the device private PTP structure as the basis for timestamp + * extension. + * + * See ice_ptp_extend_32b_ts for a detailed explanation of the extension + * algorithm. + */ +static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) +{ + const u64 mask = GENMASK_ULL(31, 0); + + return ice_ptp_extend_32b_ts(pf->ptp.cached_phc_time, + (in_tstamp >> 8) & mask); +} + +/** + * ice_ptp_read_time - Read the time from the device + * @pf: Board private structure + * @ts: timespec structure to hold the current time value + * @sts: Optional parameter for holding a pair of system timestamps from + * the system clock. Will be ignored if NULL is given. + * + * This function reads the source clock registers and stores them in a timespec. + * However, since the registers are 64 bits of nanoseconds, we must convert the + * result to a timespec before we can return. + */ +static void +ice_ptp_read_time(struct ice_pf *pf, struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + u64 time_ns = ice_ptp_read_src_clk_reg(pf, sts); + + *ts = ns_to_timespec64(time_ns); +} + +/** + * ice_ptp_write_init - Set PHC time to provided value + * @pf: Board private structure + * @ts: timespec structure that holds the new time value + * + * Set the PHC time to the specified time provided in the timespec. + */ +static int ice_ptp_write_init(struct ice_pf *pf, struct timespec64 *ts) +{ + u64 ns = timespec64_to_ns(ts); + struct ice_hw *hw = &pf->hw; + + return ice_ptp_init_time(hw, ns); +} + +/** + * ice_ptp_write_adj - Adjust PHC clock time atomically + * @pf: Board private structure + * @adj: Adjustment in nanoseconds + * + * Perform an atomic adjustment of the PHC time by the specified number of + * nanoseconds. + */ +static int ice_ptp_write_adj(struct ice_pf *pf, s32 adj) +{ + struct ice_hw *hw = &pf->hw; + + return ice_ptp_adj_clock(hw, adj); +} + +/** + * ice_ptp_adjfine - Adjust clock increment rate + * @info: the driver's PTP info structure + * @scaled_ppm: Parts per million with 16-bit fractional field + * + * Adjust the frequency of the clock by the indicated scaled ppm from the + * base frequency. + */ +static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) +{ + struct ice_pf *pf = ptp_info_to_pf(info); + u64 freq, divisor = 1000000ULL; + struct ice_hw *hw = &pf->hw; + s64 incval, diff; + int neg_adj = 0; + int err; + + incval = ICE_PTP_NOMINAL_INCVAL_E810; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + + while ((u64)scaled_ppm > div_u64(U64_MAX, incval)) { + /* handle overflow by scaling down the scaled_ppm and + * the divisor, losing some precision + */ + scaled_ppm >>= 2; + divisor >>= 2; + } + + freq = (incval * (u64)scaled_ppm) >> 16; + diff = div_u64(freq, divisor); + + if (neg_adj) + incval -= diff; + else + incval += diff; + + err = ice_ptp_write_incval_locked(hw, incval); + if (err) { + dev_err(ice_pf_to_dev(pf), "PTP failed to set incval, err %d\n", + err); + return -EIO; + } + + return 0; +} + +/** + * ice_ptp_extts_work - Workqueue task function + * @work: external timestamp work structure + * + * Service for PTP external clock event + */ +static void ice_ptp_extts_work(struct kthread_work *work) +{ + struct ice_ptp *ptp = container_of(work, struct ice_ptp, extts_work); + struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp); + struct ptp_clock_event event; + struct ice_hw *hw = &pf->hw; + u8 chan, tmr_idx; + u32 hi, lo; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + /* Event time is captured by one of the two matched registers + * GLTSYN_EVNT_L: 32 LSB of sampled time event + * GLTSYN_EVNT_H: 32 MSB of sampled time event + * Event is defined in GLTSYN_EVNT_0 register + */ + for (chan = 0; chan < GLTSYN_EVNT_H_IDX_MAX; chan++) { + /* Check if channel is enabled */ + if (pf->ptp.ext_ts_irq & (1 << chan)) { + lo = rd32(hw, GLTSYN_EVNT_L(chan, tmr_idx)); + hi = rd32(hw, GLTSYN_EVNT_H(chan, tmr_idx)); + event.timestamp = (((u64)hi) << 32) | lo; + event.type = PTP_CLOCK_EXTTS; + event.index = chan; + + /* Fire event */ + ptp_clock_event(pf->ptp.clock, &event); + pf->ptp.ext_ts_irq &= ~(1 << chan); + } + } +} + +/** + * ice_ptp_cfg_extts - Configure EXTTS pin and channel + * @pf: Board private structure + * @ena: true to enable; false to disable + * @chan: GPIO channel (0-3) + * @gpio_pin: GPIO pin + * @extts_flags: request flags from the ptp_extts_request.flags + */ +static int +ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, + unsigned int extts_flags) +{ + u32 func, aux_reg, gpio_reg, irq_reg; + struct ice_hw *hw = &pf->hw; + u8 tmr_idx; + + if (chan > (unsigned int)pf->ptp.info.n_ext_ts) + return -EINVAL; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + irq_reg = rd32(hw, PFINT_OICR_ENA); + + if (ena) { + /* Enable the interrupt */ + irq_reg |= PFINT_OICR_TSYN_EVNT_M; + aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M; + +#define GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE BIT(0) +#define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE BIT(1) + + /* set event level to requested edge */ + if (extts_flags & PTP_FALLING_EDGE) + aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE; + if (extts_flags & PTP_RISING_EDGE) + aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE; + + /* Write GPIO CTL reg. + * 0x1 is input sampled by EVENT register(channel) + * + num_in_channels * tmr_idx + */ + func = 1 + chan + (tmr_idx * 3); + gpio_reg = ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & + GLGEN_GPIO_CTL_PIN_FUNC_M); + pf->ptp.ext_ts_chan |= (1 << chan); + } else { + /* clear the values we set to reset defaults */ + aux_reg = 0; + gpio_reg = 0; + pf->ptp.ext_ts_chan &= ~(1 << chan); + if (!pf->ptp.ext_ts_chan) + irq_reg &= ~PFINT_OICR_TSYN_EVNT_M; + } + + wr32(hw, PFINT_OICR_ENA, irq_reg); + wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg); + wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg); + + return 0; +} + +/** + * ice_ptp_cfg_clkout - Configure clock to generate periodic wave + * @pf: Board private structure + * @chan: GPIO channel (0-3) + * @config: desired periodic clk configuration. NULL will disable channel + * @store: If set to true the values will be stored + * + * Configure the internal clock generator modules to generate the clock wave of + * specified period. + */ +static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, + struct ice_perout_channel *config, bool store) +{ + u64 current_time, period, start_time, phase; + struct ice_hw *hw = &pf->hw; + u32 func, val, gpio_pin; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* 0. Reset mode & out_en in AUX_OUT */ + wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0); + + /* If we're disabling the output, clear out CLKO and TGT and keep + * output level low + */ + if (!config || !config->ena) { + wr32(hw, GLTSYN_CLKO(chan, tmr_idx), 0); + wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), 0); + wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), 0); + + val = GLGEN_GPIO_CTL_PIN_DIR_M; + gpio_pin = pf->ptp.perout_channels[chan].gpio_pin; + wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val); + + /* Store the value if requested */ + if (store) + memset(&pf->ptp.perout_channels[chan], 0, + sizeof(struct ice_perout_channel)); + + return 0; + } + period = config->period; + start_time = config->start_time; + div64_u64_rem(start_time, period, &phase); + gpio_pin = config->gpio_pin; + + /* 1. Write clkout with half of required period value */ + if (period & 0x1) { + dev_err(ice_pf_to_dev(pf), "CLK Period must be an even value\n"); + goto err; + } + + period >>= 1; + + /* For proper operation, the GLTSYN_CLKO must be larger than clock tick + */ +#define MIN_PULSE 3 + if (period <= MIN_PULSE || period > U32_MAX) { + dev_err(ice_pf_to_dev(pf), "CLK Period must be > %d && < 2^33", + MIN_PULSE * 2); + goto err; + } + + wr32(hw, GLTSYN_CLKO(chan, tmr_idx), lower_32_bits(period)); + + /* Allow time for programming before start_time is hit */ + current_time = ice_ptp_read_src_clk_reg(pf, NULL); + + /* if start time is in the past start the timer at the nearest second + * maintaining phase + */ + if (start_time < current_time) + start_time = div64_u64(current_time + NSEC_PER_MSEC - 1, + NSEC_PER_SEC) * NSEC_PER_SEC + phase; + + start_time -= E810_OUT_PROP_DELAY_NS; + + /* 2. Write TARGET time */ + wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), lower_32_bits(start_time)); + wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), upper_32_bits(start_time)); + + /* 3. Write AUX_OUT register */ + val = GLTSYN_AUX_OUT_0_OUT_ENA_M | GLTSYN_AUX_OUT_0_OUTMOD_M; + wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), val); + + /* 4. write GPIO CTL reg */ + func = 8 + chan + (tmr_idx * 4); + val = GLGEN_GPIO_CTL_PIN_DIR_M | + ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & GLGEN_GPIO_CTL_PIN_FUNC_M); + wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val); + + /* Store the value if requested */ + if (store) { + memcpy(&pf->ptp.perout_channels[chan], config, + sizeof(struct ice_perout_channel)); + pf->ptp.perout_channels[chan].start_time = phase; + } + + return 0; +err: + dev_err(ice_pf_to_dev(pf), "PTP failed to cfg per_clk\n"); + return -EFAULT; +} + +/** + * ice_ptp_gpio_enable_e810 - Enable/disable ancillary features of PHC + * @info: the driver's PTP info structure + * @rq: The requested feature to change + * @on: Enable/disable flag + */ +static int +ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, + struct ptp_clock_request *rq, int on) +{ + struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_perout_channel clk_cfg = {0}; + unsigned int chan; + u32 gpio_pin; + int err; + + switch (rq->type) { + case PTP_CLK_REQ_PEROUT: + chan = rq->perout.index; + if (chan == PPS_CLK_GEN_CHAN) + clk_cfg.gpio_pin = PPS_PIN_INDEX; + else + clk_cfg.gpio_pin = chan; + + clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) + + rq->perout.period.nsec); + clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) + + rq->perout.start.nsec); + clk_cfg.ena = !!on; + + err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true); + break; + case PTP_CLK_REQ_EXTTS: + chan = rq->extts.index; + gpio_pin = chan; + + err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin, + rq->extts.flags); + break; + default: + return -EOPNOTSUPP; + } + + return err; +} + +/** + * ice_ptp_gettimex64 - Get the time of the clock + * @info: the driver's PTP info structure + * @ts: timespec64 structure to hold the current time value + * @sts: Optional parameter for holding a pair of system timestamps from + * the system clock. Will be ignored if NULL is given. + * + * Read the device clock and return the correct value on ns, after converting it + * into a timespec struct. + */ +static int +ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_hw *hw = &pf->hw; + + if (!ice_ptp_lock(hw)) { + dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n"); + return -EBUSY; + } + + ice_ptp_read_time(pf, ts, sts); + ice_ptp_unlock(hw); + + return 0; +} + +/** + * ice_ptp_settime64 - Set the time of the clock + * @info: the driver's PTP info structure + * @ts: timespec64 structure that holds the new time value + * + * Set the device clock to the user input value. The conversion from timespec + * to ns happens in the write function. + */ +static int +ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) +{ + struct ice_pf *pf = ptp_info_to_pf(info); + struct timespec64 ts64 = *ts; + struct ice_hw *hw = &pf->hw; + int err; + + if (!ice_ptp_lock(hw)) { + err = -EBUSY; + goto exit; + } + + err = ice_ptp_write_init(pf, &ts64); + ice_ptp_unlock(hw); + + if (!err) + ice_ptp_update_cached_phctime(pf); + +exit: + if (err) { + dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err); + return err; + } + + return 0; +} + +/** + * ice_ptp_adjtime_nonatomic - Do a non-atomic clock adjustment + * @info: the driver's PTP info structure + * @delta: Offset in nanoseconds to adjust the time by + */ +static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta) +{ + struct timespec64 now, then; + + then = ns_to_timespec64(delta); + ice_ptp_gettimex64(info, &now, NULL); + now = timespec64_add(now, then); + + return ice_ptp_settime64(info, (const struct timespec64 *)&now); +} + +/** + * ice_ptp_adjtime - Adjust the time of the clock by the indicated delta + * @info: the driver's PTP info structure + * @delta: Offset in nanoseconds to adjust the time by + */ +static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta) +{ + struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_hw *hw = &pf->hw; + struct device *dev; + int err; + + dev = ice_pf_to_dev(pf); + + /* Hardware only supports atomic adjustments using signed 32-bit + * integers. For any adjustment outside this range, perform + * a non-atomic get->adjust->set flow. + */ + if (delta > S32_MAX || delta < S32_MIN) { + dev_dbg(dev, "delta = %lld, adjtime non-atomic\n", delta); + return ice_ptp_adjtime_nonatomic(info, delta); + } + + if (!ice_ptp_lock(hw)) { + dev_err(dev, "PTP failed to acquire semaphore in adjtime\n"); + return -EBUSY; + } + + err = ice_ptp_write_adj(pf, delta); + + ice_ptp_unlock(hw); + + if (err) { + dev_err(dev, "PTP failed to adjust time, err %d\n", err); + return err; + } + + ice_ptp_update_cached_phctime(pf); + + return 0; +} + +/** + * ice_ptp_get_ts_config - ioctl interface to read the timestamping config + * @pf: Board private structure + * @ifr: ioctl data + * + * Copy the timestamping config to user buffer + */ +int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) +{ + struct hwtstamp_config *config; + + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return -EIO; + + config = &pf->ptp.tstamp_config; + + return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? + -EFAULT : 0; +} + +/** + * ice_ptp_set_timestamp_mode - Setup driver for requested timestamp mode + * @pf: Board private structure + * @config: hwtstamp settings requested or saved + */ +static int +ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) +{ + /* Reserved for future extensions. */ + if (config->flags) + return -EINVAL; + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + ice_set_tx_tstamp(pf, false); + break; + case HWTSTAMP_TX_ON: + ice_set_tx_tstamp(pf, true); + break; + default: + return -ERANGE; + } + + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + ice_set_rx_tstamp(pf, false); + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + case HWTSTAMP_FILTER_ALL: + config->rx_filter = HWTSTAMP_FILTER_ALL; + ice_set_rx_tstamp(pf, true); + break; + default: + return -ERANGE; + } + + return 0; +} + +/** + * ice_ptp_set_ts_config - ioctl interface to control the timestamping + * @pf: Board private structure + * @ifr: ioctl data + * + * Get the user config and store it + */ +int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return -EAGAIN; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = ice_ptp_set_timestamp_mode(pf, &config); + if (err) + return err; + + /* Save these settings for future reference */ + pf->ptp.tstamp_config = config; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +/** + * ice_ptp_rx_hwtstamp - Check for an Rx timestamp + * @rx_ring: Ring to get the VSI info + * @rx_desc: Receive descriptor + * @skb: Particular skb to send timestamp with + * + * The driver receives a notification in the receive descriptor with timestamp. + * The timestamp is in ns, so we must convert the result first. + */ +void +ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) +{ + u32 ts_high; + u64 ts_ns; + + /* Populate timesync data into skb */ + if (rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID) { + struct skb_shared_hwtstamps *hwtstamps; + + /* Use ice_ptp_extend_32b_ts directly, using the ring-specific + * cached PHC value, rather than accessing the PF. This also + * allows us to simply pass the upper 32bits of nanoseconds + * directly. Calling ice_ptp_extend_40b_ts is unnecessary as + * it would just discard these bits itself. + */ + ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high); + ts_ns = ice_ptp_extend_32b_ts(rx_ring->cached_phctime, ts_high); + + hwtstamps = skb_hwtstamps(skb); + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ts_ns); + } +} + +/** + * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs + * @info: PTP clock capabilities + */ +static void ice_ptp_setup_pins_e810(struct ptp_clock_info *info) +{ + info->n_per_out = E810_N_PER_OUT; + info->n_ext_ts = E810_N_EXT_TS; +} + +/** + * ice_ptp_set_funcs_e810 - Set specialized functions for E810 support + * @pf: Board private structure + * @info: PTP info to fill + * + * Assign functions to the PTP capabiltiies structure for E810 devices. + * Functions which operate across all device families should be set directly + * in ice_ptp_set_caps. Only add functions here which are distinct for e810 + * devices. + */ +static void +ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info) +{ + info->enable = ice_ptp_gpio_enable_e810; + + ice_ptp_setup_pins_e810(info); +} + +/** + * ice_ptp_set_caps - Set PTP capabilities + * @pf: Board private structure + */ +static void ice_ptp_set_caps(struct ice_pf *pf) +{ + struct ptp_clock_info *info = &pf->ptp.info; + struct device *dev = ice_pf_to_dev(pf); + + snprintf(info->name, sizeof(info->name) - 1, "%s-%s-clk", + dev_driver_string(dev), dev_name(dev)); + info->owner = THIS_MODULE; + info->max_adj = 999999999; + info->adjtime = ice_ptp_adjtime; + info->adjfine = ice_ptp_adjfine; + info->gettimex64 = ice_ptp_gettimex64; + info->settime64 = ice_ptp_settime64; + + ice_ptp_set_funcs_e810(pf, info); +} + +/** + * ice_ptp_create_clock - Create PTP clock device for userspace + * @pf: Board private structure + * + * This function creates a new PTP clock device. It only creates one if we + * don't already have one. Will return error if it can't create one, but success + * if we already have a device. Should be used by ice_ptp_init to create clock + * initially, and prevent global resets from creating new clock devices. + */ +static long ice_ptp_create_clock(struct ice_pf *pf) +{ + struct ptp_clock_info *info; + struct ptp_clock *clock; + struct device *dev; + + /* No need to create a clock device if we already have one */ + if (pf->ptp.clock) + return 0; + + ice_ptp_set_caps(pf); + + info = &pf->ptp.info; + dev = ice_pf_to_dev(pf); + + /* Allocate memory for kernel pins interface */ + if (info->n_pins) { + info->pin_config = devm_kcalloc(dev, info->n_pins, + sizeof(*info->pin_config), + GFP_KERNEL); + if (!info->pin_config) { + info->n_pins = 0; + return -ENOMEM; + } + } + + /* Attempt to register the clock before enabling the hardware. */ + clock = ptp_clock_register(info, dev); + if (IS_ERR(clock)) + return PTR_ERR(clock); + + pf->ptp.clock = clock; + + return 0; +} + +/** + * ice_ptp_tx_tstamp_work - Process Tx timestamps for a port + * @work: pointer to the kthread_work struct + * + * Process timestamps captured by the PHY associated with this port. To do + * this, loop over each index with a waiting skb. + * + * If a given index has a valid timestamp, perform the following steps: + * + * 1) copy the timestamp out of the PHY register + * 4) clear the timestamp valid bit in the PHY register + * 5) unlock the index by clearing the associated in_use bit. + * 2) extend the 40b timestamp value to get a 64bit timestamp + * 3) send that timestamp to the stack + * + * After looping, if we still have waiting SKBs, then re-queue the work. This + * may cause us effectively poll even when not strictly necessary. We do this + * because it's possible a new timestamp was requested around the same time as + * the interrupt. In some cases hardware might not interrupt us again when the + * timestamp is captured. + * + * Note that we only take the tracking lock when clearing the bit and when + * checking if we need to re-queue this task. The only place where bits can be + * set is the hard xmit routine where an SKB has a request flag set. The only + * places where we clear bits are this work function, or the periodic cleanup + * thread. If the cleanup thread clears a bit we're processing we catch it + * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread + * starts a new timestamp, we might not begin processing it right away but we + * will notice it at the end when we re-queue the work item. If a Tx thread + * starts a new timestamp just after this function exits without re-queuing, + * the interrupt when the timestamp finishes should trigger. Avoiding holding + * the lock for the entire function is important in order to ensure that Tx + * threads do not get blocked while waiting for the lock. + */ +static void ice_ptp_tx_tstamp_work(struct kthread_work *work) +{ + struct ice_ptp_port *ptp_port; + struct ice_ptp_tx *tx; + struct ice_pf *pf; + struct ice_hw *hw; + u8 idx; + + tx = container_of(work, struct ice_ptp_tx, work); + if (!tx->init) + return; + + ptp_port = container_of(tx, struct ice_ptp_port, tx); + pf = ptp_port_to_pf(ptp_port); + hw = &pf->hw; + + for_each_set_bit(idx, tx->in_use, tx->len) { + struct skb_shared_hwtstamps shhwtstamps = {}; + u8 phy_idx = idx + tx->quad_offset; + u64 raw_tstamp, tstamp; + struct sk_buff *skb; + int err; + + err = ice_read_phy_tstamp(hw, tx->quad, phy_idx, + &raw_tstamp); + if (err) + continue; + + /* Check if the timestamp is valid */ + if (!(raw_tstamp & ICE_PTP_TS_VALID)) + continue; + + /* clear the timestamp register, so that it won't show valid + * again when re-used. + */ + ice_clear_phy_tstamp(hw, tx->quad, phy_idx); + + /* The timestamp is valid, so we'll go ahead and clear this + * index and then send the timestamp up to the stack. + */ + spin_lock(&tx->lock); + clear_bit(idx, tx->in_use); + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; + spin_unlock(&tx->lock); + + /* it's (unlikely but) possible we raced with the cleanup + * thread for discarding old timestamp requests. + */ + if (!skb) + continue; + + /* Extend the timestamp using cached PHC time */ + tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); + shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); + } + + /* Check if we still have work to do. If so, re-queue this task to + * poll for remaining timestamps. + */ + spin_lock(&tx->lock); + if (!bitmap_empty(tx->in_use, tx->len)) + kthread_queue_work(pf->ptp.kworker, &tx->work); + spin_unlock(&tx->lock); +} + +/** + * ice_ptp_request_ts - Request an available Tx timestamp index + * @tx: the PTP Tx timestamp tracker to request from + * @skb: the SKB to associate with this timestamp request + */ +s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) +{ + u8 idx; + + /* Check if this tracker is initialized */ + if (!tx->init) + return -1; + + spin_lock(&tx->lock); + /* Find and set the first available index */ + idx = find_first_zero_bit(tx->in_use, tx->len); + if (idx < tx->len) { + /* We got a valid index that no other thread could have set. Store + * a reference to the skb and the start time to allow discarding old + * requests. + */ + set_bit(idx, tx->in_use); + tx->tstamps[idx].start = jiffies; + tx->tstamps[idx].skb = skb_get(skb); + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + } + + spin_unlock(&tx->lock); + + /* return the appropriate PHY timestamp register index, -1 if no + * indexes were available. + */ + if (idx >= tx->len) + return -1; + else + return idx + tx->quad_offset; +} + +/** + * ice_ptp_process_ts - Spawn kthread work to handle timestamps + * @pf: Board private structure + * + * Queue work required to process the PTP Tx timestamps outside of interrupt + * context. + */ +void ice_ptp_process_ts(struct ice_pf *pf) +{ + if (pf->ptp.port.tx.init) + kthread_queue_work(pf->ptp.kworker, &pf->ptp.port.tx.work); +} + +/** + * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps + * @tx: Tx tracking structure to initialize + * + * Assumes that the length has already been initialized. Do not call directly, + * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. + */ +static int +ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) +{ + tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); + if (!tx->tstamps) + return -ENOMEM; + + tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); + if (!tx->in_use) { + kfree(tx->tstamps); + tx->tstamps = NULL; + return -ENOMEM; + } + + spin_lock_init(&tx->lock); + kthread_init_work(&tx->work, ice_ptp_tx_tstamp_work); + + tx->init = 1; + + return 0; +} + +/** + * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker + * @pf: Board private structure + * @tx: the tracker to flush + */ +static void +ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + u8 idx; + + for (idx = 0; idx < tx->len; idx++) { + u8 phy_idx = idx + tx->quad_offset; + + /* Clear any potential residual timestamp in the PHY block */ + if (!pf->hw.reset_ongoing) + ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + + if (tx->tstamps[idx].skb) { + dev_kfree_skb_any(tx->tstamps[idx].skb); + tx->tstamps[idx].skb = NULL; + } + } +} + +/** + * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker + * @pf: Board private structure + * @tx: Tx tracking structure to release + * + * Free memory associated with the Tx timestamp tracker. + */ +static void +ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + tx->init = 0; + + kthread_cancel_work_sync(&tx->work); + + ice_ptp_flush_tx_tracker(pf, tx); + + kfree(tx->tstamps); + tx->tstamps = NULL; + + kfree(tx->in_use); + tx->in_use = NULL; + + tx->len = 0; +} + +/** + * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps + * @pf: Board private structure + * @tx: the Tx tracking structure to initialize + * + * Initialize the Tx timestamp tracker for this PF. For E810 devices, each + * port has its own block of timestamps, independent of the other ports. + */ +static int +ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + tx->quad = pf->hw.port_info->lport; + tx->quad_offset = 0; + tx->len = INDEX_PER_QUAD; + + return ice_ptp_alloc_tx_tracker(tx); +} + +/** + * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped + * @tx: PTP Tx tracker to clean up + * + * Loop through the Tx timestamp requests and see if any of them have been + * waiting for a long time. Discard any SKBs that have been waiting for more + * than 2 seconds. This is long enough to be reasonably sure that the + * timestamp will never be captured. This might happen if the packet gets + * discarded before it reaches the PHY timestamping block. + */ +static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) +{ + u8 idx; + + if (!tx->init) + return; + + for_each_set_bit(idx, tx->in_use, tx->len) { + struct sk_buff *skb; + + /* Check if this SKB has been waiting for too long */ + if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) + continue; + + spin_lock(&tx->lock); + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; + clear_bit(idx, tx->in_use); + spin_unlock(&tx->lock); + + /* Free the SKB after we've cleared the bit */ + dev_kfree_skb_any(skb); + } +} + +static void ice_ptp_periodic_work(struct kthread_work *work) +{ + struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work); + struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp); + + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; + + ice_ptp_update_cached_phctime(pf); + + ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx); + + /* Run twice a second */ + kthread_queue_delayed_work(ptp->kworker, &ptp->work, + msecs_to_jiffies(500)); +} + +/** + * ice_ptp_init_owner - Initialize PTP_1588_CLOCK device + * @pf: Board private structure + * + * Setup and initialize a PTP clock device that represents the device hardware + * clock. Save the clock index for other functions connected to the same + * hardware resource. + */ +static int ice_ptp_init_owner(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + struct timespec64 ts; + u8 src_idx; + int err; + + wr32(hw, GLTSYN_SYNC_DLAY, 0); + + /* Clear some HW residue and enable source clock */ + src_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* Enable source clocks */ + wr32(hw, GLTSYN_ENA(src_idx), GLTSYN_ENA_TSYN_ENA_M); + + /* Enable PHY time sync */ + err = ice_ptp_init_phy_e810(hw); + if (err) + goto err_exit; + + /* Clear event status indications for auxiliary pins */ + (void)rd32(hw, GLTSYN_STAT(src_idx)); + + /* Acquire the global hardware lock */ + if (!ice_ptp_lock(hw)) { + err = -EBUSY; + goto err_exit; + } + + /* Write the increment time value to PHY and LAN */ + err = ice_ptp_write_incval(hw, ICE_PTP_NOMINAL_INCVAL_E810); + if (err) { + ice_ptp_unlock(hw); + goto err_exit; + } + + ts = ktime_to_timespec64(ktime_get_real()); + /* Write the initial Time value to PHY and LAN */ + err = ice_ptp_write_init(pf, &ts); + if (err) { + ice_ptp_unlock(hw); + goto err_exit; + } + + /* Release the global hardware lock */ + ice_ptp_unlock(hw); + + /* Ensure we have a clock device */ + err = ice_ptp_create_clock(pf); + if (err) + goto err_clk; + + /* Store the PTP clock index for other PFs */ + ice_set_ptp_clock_index(pf); + + return 0; + +err_clk: + pf->ptp.clock = NULL; +err_exit: + dev_err(dev, "PTP failed to register clock, err %d\n", err); + + return err; +} + +/** + * ice_ptp_init - Initialize the PTP support after device probe or reset + * @pf: Board private structure + * + * This function sets device up for PTP support. The first time it is run, it + * will create a clock device. It does not create a clock device if one + * already exists. It also reconfigures the device after a reset. + */ +void ice_ptp_init(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct kthread_worker *kworker; + struct ice_hw *hw = &pf->hw; + int err; + + /* PTP is currently only supported on E810 devices */ + if (!ice_is_e810(hw)) + return; + + /* Check if this PF owns the source timer */ + if (hw->func_caps.ts_func_info.src_tmr_owned) { + err = ice_ptp_init_owner(pf); + if (err) + return; + } + + /* Disable timestamping for both Tx and Rx */ + ice_ptp_cfg_timestamp(pf, false); + + /* Initialize the PTP port Tx timestamp tracker */ + ice_ptp_init_tx_e810(pf, &pf->ptp.port.tx); + + /* Initialize work functions */ + kthread_init_delayed_work(&pf->ptp.work, ice_ptp_periodic_work); + kthread_init_work(&pf->ptp.extts_work, ice_ptp_extts_work); + + /* Allocate a kworker for handling work required for the ports + * connected to the PTP hardware clock. + */ + kworker = kthread_create_worker(0, "ice-ptp-%s", dev_name(dev)); + if (IS_ERR(kworker)) { + err = PTR_ERR(kworker); + goto err_kworker; + } + pf->ptp.kworker = kworker; + + set_bit(ICE_FLAG_PTP, pf->flags); + + /* Start periodic work going */ + kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work, 0); + + dev_info(dev, "PTP init successful\n"); + return; + +err_kworker: + /* If we registered a PTP clock, release it */ + if (pf->ptp.clock) { + ptp_clock_unregister(pf->ptp.clock); + pf->ptp.clock = NULL; + } + dev_err(dev, "PTP failed %d\n", err); +} + +/** + * ice_ptp_release - Disable the driver/HW support and unregister the clock + * @pf: Board private structure + * + * This function handles the cleanup work required from the initialization by + * clearing out the important information and unregistering the clock + */ +void ice_ptp_release(struct ice_pf *pf) +{ + /* Disable timestamping for both Tx and Rx */ + ice_ptp_cfg_timestamp(pf, false); + + ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx); + + clear_bit(ICE_FLAG_PTP, pf->flags); + + kthread_cancel_delayed_work_sync(&pf->ptp.work); + + if (pf->ptp.kworker) { + kthread_destroy_worker(pf->ptp.kworker); + pf->ptp.kworker = NULL; + } + + if (!pf->ptp.clock) + return; + + ice_clear_ptp_clock_index(pf); + ptp_clock_unregister(pf->ptp.clock); + pf->ptp.clock = NULL; + + dev_info(ice_pf_to_dev(pf), "Removed PTP clock\n"); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h new file mode 100644 index 000000000000..e1c787bd5b96 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021, Intel Corporation. */ + +#ifndef _ICE_PTP_H_ +#define _ICE_PTP_H_ + +#include <linux/ptp_clock_kernel.h> +#include <linux/kthread.h> + +#include "ice_ptp_hw.h" + +enum ice_ptp_pin { + GPIO_20 = 0, + GPIO_21, + GPIO_22, + GPIO_23, + NUM_ICE_PTP_PIN +}; + +struct ice_perout_channel { + bool ena; + u32 gpio_pin; + u64 period; + u64 start_time; +}; + +/* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp + * is stored in a buffer of registers. Depending on the specific hardware, + * this buffer might be shared across multiple PHY ports. + * + * On transmit of a packet to be timestamped, software is responsible for + * selecting an open index. Hardware makes no attempt to lock or prevent + * re-use of an index for multiple packets. + * + * To handle this, timestamp indexes must be tracked by software to ensure + * that an index is not re-used for multiple transmitted packets. The + * structures and functions declared in this file track the available Tx + * register indexes, as well as provide storage for the SKB pointers. + * + * To allow multiple ports to access the shared register block independently, + * the blocks are split up so that indexes are assigned to each port based on + * hardware logical port number. + */ + +/** + * struct ice_tx_tstamp - Tracking for a single Tx timestamp + * @skb: pointer to the SKB for this timestamp request + * @start: jiffies when the timestamp was first requested + * + * This structure tracks a single timestamp request. The SKB pointer is + * provided when initiating a request. The start time is used to ensure that + * we discard old requests that were not fulfilled within a 2 second time + * window. + */ +struct ice_tx_tstamp { + struct sk_buff *skb; + unsigned long start; +}; + +/** + * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port + * @work: work function to handle processing of Tx timestamps + * @lock: lock to prevent concurrent write to in_use bitmap + * @tstamps: array of len to store outstanding requests + * @in_use: bitmap of len to indicate which slots are in use + * @quad: which quad the timestamps are captured in + * @quad_offset: offset into timestamp block of the quad to get the real index + * @len: length of the tstamps and in_use fields. + * @init: if true, the tracker is initialized; + */ +struct ice_ptp_tx { + struct kthread_work work; + spinlock_t lock; /* lock protecting in_use bitmap */ + struct ice_tx_tstamp *tstamps; + unsigned long *in_use; + u8 quad; + u8 quad_offset; + u8 len; + u8 init; +}; + +/* Quad and port information for initializing timestamp blocks */ +#define INDEX_PER_QUAD 64 +#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD) + +/** + * struct ice_ptp_port - data used to initialize an external port for PTP + * + * This structure contains PTP data related to the external ports. Currently + * it is used for tracking the Tx timestamps of a port. In the future this + * structure will also hold information for the E822 port initialization + * logic. + * + * @tx: Tx timestamp tracking for this port + */ +struct ice_ptp_port { + struct ice_ptp_tx tx; +}; + +#define GLTSYN_TGT_H_IDX_MAX 4 + +/** + * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK + * @port: data for the PHY port initialization procedure + * @work: delayed work function for periodic tasks + * @extts_work: work function for handling external Tx timestamps + * @cached_phc_time: a cached copy of the PHC time for timestamp extension + * @ext_ts_chan: the external timestamp channel in use + * @ext_ts_irq: the external timestamp IRQ in use + * @kworker: kwork thread for handling periodic work + * @perout_channels: periodic output data + * @info: structure defining PTP hardware capabilities + * @clock: pointer to registered PTP clock device + * @tstamp_config: hardware timestamping configuration + */ +struct ice_ptp { + struct ice_ptp_port port; + struct kthread_delayed_work work; + struct kthread_work extts_work; + u64 cached_phc_time; + u8 ext_ts_chan; + u8 ext_ts_irq; + struct kthread_worker *kworker; + struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX]; + struct ptp_clock_info info; + struct ptp_clock *clock; + struct hwtstamp_config tstamp_config; +}; + +#define __ptp_port_to_ptp(p) \ + container_of((p), struct ice_ptp, port) +#define ptp_port_to_pf(p) \ + container_of(__ptp_port_to_ptp((p)), struct ice_pf, ptp) + +#define __ptp_info_to_ptp(i) \ + container_of((i), struct ice_ptp, info) +#define ptp_info_to_pf(i) \ + container_of(__ptp_info_to_ptp((i)), struct ice_pf, ptp) + +#define PTP_SHARED_CLK_IDX_VALID BIT(31) +#define ICE_PTP_TS_VALID BIT(0) + +/* Per-channel register definitions */ +#define GLTSYN_AUX_OUT(_chan, _idx) (GLTSYN_AUX_OUT_0(_idx) + ((_chan) * 8)) +#define GLTSYN_AUX_IN(_chan, _idx) (GLTSYN_AUX_IN_0(_idx) + ((_chan) * 8)) +#define GLTSYN_CLKO(_chan, _idx) (GLTSYN_CLKO_0(_idx) + ((_chan) * 8)) +#define GLTSYN_TGT_L(_chan, _idx) (GLTSYN_TGT_L_0(_idx) + ((_chan) * 16)) +#define GLTSYN_TGT_H(_chan, _idx) (GLTSYN_TGT_H_0(_idx) + ((_chan) * 16)) +#define GLTSYN_EVNT_L(_chan, _idx) (GLTSYN_EVNT_L_0(_idx) + ((_chan) * 16)) +#define GLTSYN_EVNT_H(_chan, _idx) (GLTSYN_EVNT_H_0(_idx) + ((_chan) * 16)) +#define GLTSYN_EVNT_H_IDX_MAX 3 + +/* Pin definitions for PTP PPS out */ +#define PPS_CLK_GEN_CHAN 3 +#define PPS_CLK_SRC_CHAN 2 +#define PPS_PIN_INDEX 5 +#define TIME_SYNC_PIN_INDEX 4 +#define E810_N_EXT_TS 3 +#define E810_N_PER_OUT 4 + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +struct ice_pf; +int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr); +int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); +int ice_get_ptp_clock_index(struct ice_pf *pf); + +s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); +void ice_ptp_process_ts(struct ice_pf *pf); + +void +ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb); +void ice_ptp_init(struct ice_pf *pf); +void ice_ptp_release(struct ice_pf *pf); +#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ +static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline int ice_get_ptp_clock_index(struct ice_pf *pf) +{ + return -1; +} + +static inline s8 +ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) +{ + return -1; +} + +static inline void ice_ptp_process_ts(struct ice_pf *pf) { } +static inline void +ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { } +static inline void ice_ptp_init(struct ice_pf *pf) { } +static inline void ice_ptp_release(struct ice_pf *pf) { } +#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ +#endif /* _ICE_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c new file mode 100644 index 000000000000..3eca0e4eab0b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -0,0 +1,651 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021, Intel Corporation. */ + +#include "ice_common.h" +#include "ice_ptp_hw.h" + +/* Low level functions for interacting with and managing the device clock used + * for the Precision Time Protocol. + * + * The ice hardware represents the current time using three registers: + * + * GLTSYN_TIME_H GLTSYN_TIME_L GLTSYN_TIME_R + * +---------------+ +---------------+ +---------------+ + * | 32 bits | | 32 bits | | 32 bits | + * +---------------+ +---------------+ +---------------+ + * + * The registers are incremented every clock tick using a 40bit increment + * value defined over two registers: + * + * GLTSYN_INCVAL_H GLTSYN_INCVAL_L + * +---------------+ +---------------+ + * | 8 bit s | | 32 bits | + * +---------------+ +---------------+ + * + * The increment value is added to the GLSTYN_TIME_R and GLSTYN_TIME_L + * registers every clock source tick. Depending on the specific device + * configuration, the clock source frequency could be one of a number of + * values. + * + * For E810 devices, the increment frequency is 812.5 MHz + * + * The hardware captures timestamps in the PHY for incoming packets, and for + * outgoing packets on request. To support this, the PHY maintains a timer + * that matches the lower 64 bits of the global source timer. + * + * In order to ensure that the PHY timers and the source timer are equivalent, + * shadow registers are used to prepare the desired initial values. A special + * sync command is issued to trigger copying from the shadow registers into + * the appropriate source and PHY registers simultaneously. + */ + +/** + * ice_get_ptp_src_clock_index - determine source clock index + * @hw: pointer to HW struct + * + * Determine the source clock index currently in use, based on device + * capabilities reported during initialization. + */ +u8 ice_get_ptp_src_clock_index(struct ice_hw *hw) +{ + return hw->func_caps.ts_func_info.tmr_index_assoc; +} + +/* E810 functions + * + * The following functions operate on the E810 series devices which use + * a separate external PHY. + */ + +/** + * ice_read_phy_reg_e810 - Read register from external PHY on E810 + * @hw: pointer to the HW struct + * @addr: the address to read from + * @val: On return, the value read from the PHY + * + * Read a register from the external PHY on the E810 device. + */ +static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val) +{ + struct ice_sbq_msg_input msg = {0}; + int status; + + msg.msg_addr_low = lower_16_bits(addr); + msg.msg_addr_high = upper_16_bits(addr); + msg.opcode = ice_sbq_msg_rd; + msg.dest_dev = rmn_0; + + status = ice_sbq_rw_reg(hw, &msg); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, status %d\n", + status); + return status; + } + + *val = msg.data; + + return 0; +} + +/** + * ice_write_phy_reg_e810 - Write register on external PHY on E810 + * @hw: pointer to the HW struct + * @addr: the address to writem to + * @val: the value to write to the PHY + * + * Write a value to a register of the external PHY on the E810 device. + */ +static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val) +{ + struct ice_sbq_msg_input msg = {0}; + int status; + + msg.msg_addr_low = lower_16_bits(addr); + msg.msg_addr_high = upper_16_bits(addr); + msg.opcode = ice_sbq_msg_wr; + msg.dest_dev = rmn_0; + msg.data = val; + + status = ice_sbq_rw_reg(hw, &msg); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, status %d\n", + status); + return status; + } + + return 0; +} + +/** + * ice_read_phy_tstamp_e810 - Read a PHY timestamp out of the external PHY + * @hw: pointer to the HW struct + * @lport: the lport to read from + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the timestamp block of the external PHY + * on the E810 device. + */ +static int +ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) +{ + u32 lo_addr, hi_addr, lo, hi; + int status; + + lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); + hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); + + status = ice_read_phy_reg_e810(hw, lo_addr, &lo); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, status %d\n", + status); + return status; + } + + status = ice_read_phy_reg_e810(hw, hi_addr, &hi); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, status %d\n", + status); + return status; + } + + /* For E810 devices, the timestamp is reported with the lower 32 bits + * in the low register, and the upper 8 bits in the high register. + */ + *tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M); + + return 0; +} + +/** + * ice_clear_phy_tstamp_e810 - Clear a timestamp from the external PHY + * @hw: pointer to the HW struct + * @lport: the lport to read from + * @idx: the timestamp index to reset + * + * Clear a timestamp, resetting its valid bit, from the timestamp block of the + * external PHY on the E810 device. + */ +static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx) +{ + u32 lo_addr, hi_addr; + int status; + + lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); + hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); + + status = ice_write_phy_reg_e810(hw, lo_addr, 0); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, status %d\n", + status); + return status; + } + + status = ice_write_phy_reg_e810(hw, hi_addr, 0); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, status %d\n", + status); + return status; + } + + return 0; +} + +/** + * ice_ptp_init_phy_e810 - Enable PTP function on the external PHY + * @hw: pointer to HW struct + * + * Enable the timesync PTP functionality for the external PHY connected to + * this function. + */ +int ice_ptp_init_phy_e810(struct ice_hw *hw) +{ + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx), + GLTSYN_ENA_TSYN_ENA_M); + if (status) + ice_debug(hw, ICE_DBG_PTP, "PTP failed in ena_phy_time_syn %d\n", + status); + + return status; +} + +/** + * ice_ptp_prep_phy_time_e810 - Prepare PHY port with initial time + * @hw: Board private structure + * @time: Time to initialize the PHY port clock to + * + * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the + * initial clock time. The time will not actually be programmed until the + * driver issues an INIT_TIME command. + * + * The time value is the upper 32 bits of the PHY timer, usually in units of + * nominal nanoseconds. + */ +static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time) +{ + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_0(tmr_idx), 0); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_0, status %d\n", + status); + return status; + } + + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_L(tmr_idx), time); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_L, status %d\n", + status); + return status; + } + + return 0; +} + +/** + * ice_ptp_prep_phy_adj_e810 - Prep PHY port for a time adjustment + * @hw: pointer to HW struct + * @adj: adjustment value to program + * + * Prepare the PHY port for an atomic adjustment by programming the PHY + * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment + * is completed by issuing an ADJ_TIME sync command. + * + * The adjustment value only contains the portion used for the upper 32bits of + * the PHY timer, usually in units of nominal nanoseconds. Negative + * adjustments are supported using 2s complement arithmetic. + */ +static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj) +{ + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* Adjustments are represented as signed 2's complement values in + * nanoseconds. Sub-nanosecond adjustment is not supported. + */ + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), 0); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_L, status %d\n", + status); + return status; + } + + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), adj); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_H, status %d\n", + status); + return status; + } + + return 0; +} + +/** + * ice_ptp_prep_phy_incval_e810 - Prep PHY port increment value change + * @hw: pointer to HW struct + * @incval: The new 40bit increment value to prepare + * + * Prepare the PHY port for a new increment value by programming the PHY + * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is + * completed by issuing an INIT_INCVAL command. + */ +static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval) +{ + u32 high, low; + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + low = lower_32_bits(incval); + high = upper_32_bits(incval); + + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), low); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write incval to PHY SHADJ_L, status %d\n", + status); + return status; + } + + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), high); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write incval PHY SHADJ_H, status %d\n", + status); + return status; + } + + return 0; +} + +/** + * ice_ptp_port_cmd_e810 - Prepare all external PHYs for a timer command + * @hw: pointer to HW struct + * @cmd: Command to be sent to the port + * + * Prepare the external PHYs connected to this device for a timer sync + * command. + */ +static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u32 cmd_val, val; + int status; + + switch (cmd) { + case INIT_TIME: + cmd_val = GLTSYN_CMD_INIT_TIME; + break; + case INIT_INCVAL: + cmd_val = GLTSYN_CMD_INIT_INCVAL; + break; + case ADJ_TIME: + cmd_val = GLTSYN_CMD_ADJ_TIME; + break; + case READ_TIME: + cmd_val = GLTSYN_CMD_READ_TIME; + break; + case ADJ_TIME_AT_TIME: + cmd_val = GLTSYN_CMD_ADJ_INIT_TIME; + break; + } + + /* Read, modify, write */ + status = ice_read_phy_reg_e810(hw, ETH_GLTSYN_CMD, &val); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read GLTSYN_CMD, status %d\n", status); + return status; + } + + /* Modify necessary bits only and perform write */ + val &= ~TS_CMD_MASK_E810; + val |= cmd_val; + + status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_CMD, val); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write back GLTSYN_CMD, status %d\n", status); + return status; + } + + return 0; +} + +/* Device agnostic functions + * + * The following functions implement useful behavior to hide the differences + * between E810 and other devices. They call the device-specific + * implementations where necessary. + * + * Currently, the driver only supports E810, but future work will enable + * support for E822-based devices. + */ + +/** + * ice_ptp_lock - Acquire PTP global semaphore register lock + * @hw: pointer to the HW struct + * + * Acquire the global PTP hardware semaphore lock. Returns true if the lock + * was acquired, false otherwise. + * + * The PFTSYN_SEM register sets the busy bit on read, returning the previous + * value. If software sees the busy bit cleared, this means that this function + * acquired the lock (and the busy bit is now set). If software sees the busy + * bit set, it means that another function acquired the lock. + * + * Software must clear the busy bit with a write to release the lock for other + * functions when done. + */ +bool ice_ptp_lock(struct ice_hw *hw) +{ + u32 hw_lock; + int i; + +#define MAX_TRIES 5 + + for (i = 0; i < MAX_TRIES; i++) { + hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); + hw_lock = hw_lock & PFTSYN_SEM_BUSY_M; + if (!hw_lock) + break; + + /* Somebody is holding the lock */ + usleep_range(10000, 20000); + } + + return !hw_lock; +} + +/** + * ice_ptp_unlock - Release PTP global semaphore register lock + * @hw: pointer to the HW struct + * + * Release the global PTP hardware semaphore lock. This is done by writing to + * the PFTSYN_SEM register. + */ +void ice_ptp_unlock(struct ice_hw *hw) +{ + wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0); +} + +/** + * ice_ptp_src_cmd - Prepare source timer for a timer command + * @hw: pointer to HW structure + * @cmd: Timer command + * + * Prepare the source timer for an upcoming timer sync command. + */ +static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u32 cmd_val; + u8 tmr_idx; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + cmd_val = tmr_idx << SEL_CPK_SRC; + + switch (cmd) { + case INIT_TIME: + cmd_val |= GLTSYN_CMD_INIT_TIME; + break; + case INIT_INCVAL: + cmd_val |= GLTSYN_CMD_INIT_INCVAL; + break; + case ADJ_TIME: + cmd_val |= GLTSYN_CMD_ADJ_TIME; + break; + case ADJ_TIME_AT_TIME: + cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME; + break; + case READ_TIME: + cmd_val |= GLTSYN_CMD_READ_TIME; + break; + } + + wr32(hw, GLTSYN_CMD, cmd_val); +} + +/** + * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command + * @hw: pointer to HW struct + * @cmd: the command to issue + * + * Prepare the source timer and PHY timers and then trigger the requested + * command. This causes the shadow registers previously written in preparation + * for the command to be synchronously applied to both the source and PHY + * timers. + */ +static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + int status; + + /* First, prepare the source timer */ + ice_ptp_src_cmd(hw, cmd); + + /* Next, prepare the ports */ + status = ice_ptp_port_cmd_e810(hw, cmd); + if (status) { + ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, status %d\n", + cmd, status); + return status; + } + + /* Write the sync command register to drive both source and PHY timer commands + * synchronously + */ + wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD); + + return 0; +} + +/** + * ice_ptp_init_time - Initialize device time to provided value + * @hw: pointer to HW struct + * @time: 64bits of time (GLTSYN_TIME_L and GLTSYN_TIME_H) + * + * Initialize the device to the specified time provided. This requires a three + * step process: + * + * 1) write the new init time to the source timer shadow registers + * 2) write the new init time to the PHY timer shadow registers + * 3) issue an init_time timer command to synchronously switch both the source + * and port timers to the new init time value at the next clock cycle. + */ +int ice_ptp_init_time(struct ice_hw *hw, u64 time) +{ + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* Source timers */ + wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time)); + wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time)); + wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0); + + /* PHY timers */ + /* Fill Rx and Tx ports and send msg to PHY */ + status = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); + if (status) + return status; + + return ice_ptp_tmr_cmd(hw, INIT_TIME); +} + +/** + * ice_ptp_write_incval - Program PHC with new increment value + * @hw: pointer to HW struct + * @incval: Source timer increment value per clock cycle + * + * Program the PHC with a new increment value. This requires a three-step + * process: + * + * 1) Write the increment value to the source timer shadow registers + * 2) Write the increment value to the PHY timer shadow registers + * 3) Issue an INIT_INCVAL timer command to synchronously switch both the + * source and port timers to the new increment value at the next clock + * cycle. + */ +int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) +{ + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* Shadow Adjust */ + wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval)); + wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval)); + + status = ice_ptp_prep_phy_incval_e810(hw, incval); + if (status) + return status; + + return ice_ptp_tmr_cmd(hw, INIT_INCVAL); +} + +/** + * ice_ptp_write_incval_locked - Program new incval while holding semaphore + * @hw: pointer to HW struct + * @incval: Source timer increment value per clock cycle + * + * Program a new PHC incval while holding the PTP semaphore. + */ +int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval) +{ + int status; + + if (!ice_ptp_lock(hw)) + return -EBUSY; + + status = ice_ptp_write_incval(hw, incval); + + ice_ptp_unlock(hw); + + return status; +} + +/** + * ice_ptp_adj_clock - Adjust PHC clock time atomically + * @hw: pointer to HW struct + * @adj: Adjustment in nanoseconds + * + * Perform an atomic adjustment of the PHC time by the specified number of + * nanoseconds. This requires a three-step process: + * + * 1) Write the adjustment to the source timer shadow registers + * 2) Write the adjustment to the PHY timer shadow registers + * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to + * both the source and port timers at the next clock cycle. + */ +int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) +{ + int status; + u8 tmr_idx; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* Write the desired clock adjustment into the GLTSYN_SHADJ register. + * For an ADJ_TIME command, this set of registers represents the value + * to add to the clock time. It supports subtraction by interpreting + * the value as a 2's complement integer. + */ + wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0); + wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj); + + status = ice_ptp_prep_phy_adj_e810(hw, adj); + if (status) + return status; + + return ice_ptp_tmr_cmd(hw, ADJ_TIME); +} + +/** + * ice_read_phy_tstamp - Read a PHY timestamp from the timestamo block + * @hw: pointer to the HW struct + * @block: the block to read from + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the timestamp block. + */ +int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) +{ + return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); +} + +/** + * ice_clear_phy_tstamp - Clear a timestamp from the timestamp block + * @hw: pointer to the HW struct + * @block: the block to read from + * @idx: the timestamp index to reset + * + * Clear a timestamp, resetting its valid bit, from the timestamp block. + */ +int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) +{ + return ice_clear_phy_tstamp_e810(hw, block, idx); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h new file mode 100644 index 000000000000..55a414e87018 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021, Intel Corporation. */ + +#ifndef _ICE_PTP_HW_H_ +#define _ICE_PTP_HW_H_ + +enum ice_ptp_tmr_cmd { + INIT_TIME, + INIT_INCVAL, + ADJ_TIME, + ADJ_TIME_AT_TIME, + READ_TIME +}; + +/* Increment value to generate nanoseconds in the GLTSYN_TIME_L register for + * the E810 devices. Based off of a PLL with an 812.5 MHz frequency. + */ +#define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL + +/* Device agnostic functions */ +u8 ice_get_ptp_src_clock_index(struct ice_hw *hw); +bool ice_ptp_lock(struct ice_hw *hw); +void ice_ptp_unlock(struct ice_hw *hw); +int ice_ptp_init_time(struct ice_hw *hw, u64 time); +int ice_ptp_write_incval(struct ice_hw *hw, u64 incval); +int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); +int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj); +int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp); +int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); + +/* E810 family functions */ +int ice_ptp_init_phy_e810(struct ice_hw *hw); + +#define PFTSYN_SEM_BYTES 4 + +/* PHY timer commands */ +#define SEL_CPK_SRC 8 + +/* Time Sync command Definitions */ +#define GLTSYN_CMD_INIT_TIME BIT(0) +#define GLTSYN_CMD_INIT_INCVAL BIT(1) +#define GLTSYN_CMD_ADJ_TIME BIT(2) +#define GLTSYN_CMD_ADJ_INIT_TIME (BIT(2) | BIT(3)) +#define GLTSYN_CMD_READ_TIME BIT(7) + +#define TS_CMD_MASK_E810 0xFF +#define SYNC_EXEC_CMD 0x3 + +/* E810 timesync enable register */ +#define ETH_GLTSYN_ENA(_i) (0x03000348 + ((_i) * 4)) + +/* E810 shadow init time registers */ +#define ETH_GLTSYN_SHTIME_0(i) (0x03000368 + ((i) * 32)) +#define ETH_GLTSYN_SHTIME_L(i) (0x0300036C + ((i) * 32)) + +/* E810 shadow time adjust registers */ +#define ETH_GLTSYN_SHADJ_L(_i) (0x03000378 + ((_i) * 32)) +#define ETH_GLTSYN_SHADJ_H(_i) (0x0300037C + ((_i) * 32)) + +/* E810 timer command register */ +#define ETH_GLTSYN_CMD 0x03000344 + +/* Source timer incval macros */ +#define INCVAL_HIGH_M 0xFF + +/* Timestamp block macros */ +#define TS_LOW_M 0xFFFFFFFF +#define TS_HIGH_S 32 + +#define BYTES_PER_IDX_ADDR_L_U 8 + +/* External PHY timestamp address */ +#define TS_EXT(a, port, idx) ((a) + (0x1000 * (port)) + \ + ((idx) * BYTES_PER_IDX_ADDR_L_U)) + +#define LOW_TX_MEMORY_BANK_START 0x03090000 +#define HIGH_TX_MEMORY_BANK_START 0x03090004 + +#endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h new file mode 100644 index 000000000000..ead75fe2bcda --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021, Intel Corporation. */ + +#ifndef _ICE_SBQ_CMD_H_ +#define _ICE_SBQ_CMD_H_ + +/* This header file defines the Sideband Queue commands, error codes and + * descriptor format. It is shared between Firmware and Software. + */ + +/* Sideband Queue command structure and opcodes */ +enum ice_sbq_opc { + /* Sideband Queue commands */ + ice_sbq_opc_neigh_dev_req = 0x0C00, + ice_sbq_opc_neigh_dev_ev = 0x0C01 +}; + +/* Sideband Queue descriptor. Indirect command + * and non posted + */ +struct ice_sbq_cmd_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 cmd_retval; + + /* Opaque message data */ + __le32 cookie_high; + __le32 cookie_low; + + union { + __le16 cmd_len; + __le16 cmpl_len; + } param0; + + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_sbq_evt_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 cmd_retval; + u8 data[24]; +}; + +enum ice_sbq_msg_dev { + rmn_0 = 0x02, + rmn_1 = 0x03, + rmn_2 = 0x04, + cgu = 0x06 +}; + +enum ice_sbq_msg_opcode { + ice_sbq_msg_rd = 0x00, + ice_sbq_msg_wr = 0x01 +}; + +#define ICE_SBQ_MSG_FLAGS 0x40 +#define ICE_SBQ_MSG_SBE_FBE 0x0F + +struct ice_sbq_msg_req { + u8 dest_dev; + u8 src_dev; + u8 opcode; + u8 flags; + u8 sbe_fbe; + u8 func_id; + __le16 msg_addr_low; + __le32 msg_addr_high; + __le32 data; +}; + +struct ice_sbq_msg_cmpl { + u8 dest_dev; + u8 src_dev; + u8 opcode; + u8 flags; + __le32 data; +}; + +/* Internal struct */ +struct ice_sbq_msg_input { + u8 dest_dev; + u8 opcode; + u16 msg_addr_low; + u32 msg_addr_high; + u32 data; +}; +#endif /* _ICE_SBQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2f097637e405..9f07b6641705 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -596,6 +596,50 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs) } /** + * ice_alloc_rdma_q_ctx - allocate RDMA queue contexts for the given VSI and TC + * @hw: pointer to the HW struct + * @vsi_handle: VSI handle + * @tc: TC number + * @new_numqs: number of queues + */ +static enum ice_status +ice_alloc_rdma_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs) +{ + struct ice_vsi_ctx *vsi_ctx; + struct ice_q_ctx *q_ctx; + + vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle); + if (!vsi_ctx) + return ICE_ERR_PARAM; + /* allocate RDMA queue contexts */ + if (!vsi_ctx->rdma_q_ctx[tc]) { + vsi_ctx->rdma_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw), + new_numqs, + sizeof(*q_ctx), + GFP_KERNEL); + if (!vsi_ctx->rdma_q_ctx[tc]) + return ICE_ERR_NO_MEMORY; + vsi_ctx->num_rdma_q_entries[tc] = new_numqs; + return 0; + } + /* num queues are increased, update the queue contexts */ + if (new_numqs > vsi_ctx->num_rdma_q_entries[tc]) { + u16 prev_num = vsi_ctx->num_rdma_q_entries[tc]; + + q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs, + sizeof(*q_ctx), GFP_KERNEL); + if (!q_ctx) + return ICE_ERR_NO_MEMORY; + memcpy(q_ctx, vsi_ctx->rdma_q_ctx[tc], + prev_num * sizeof(*q_ctx)); + devm_kfree(ice_hw_to_dev(hw), vsi_ctx->rdma_q_ctx[tc]); + vsi_ctx->rdma_q_ctx[tc] = q_ctx; + vsi_ctx->num_rdma_q_entries[tc] = new_numqs; + } + return 0; +} + +/** * ice_aq_rl_profile - performs a rate limiting task * @hw: pointer to the HW struct * @opcode: opcode for add, query, or remove profile(s) @@ -1774,13 +1818,22 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, if (!vsi_ctx) return ICE_ERR_PARAM; - prev_numqs = vsi_ctx->sched.max_lanq[tc]; + if (owner == ICE_SCHED_NODE_OWNER_LAN) + prev_numqs = vsi_ctx->sched.max_lanq[tc]; + else + prev_numqs = vsi_ctx->sched.max_rdmaq[tc]; /* num queues are not changed or less than the previous number */ if (new_numqs <= prev_numqs) return status; - status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs); - if (status) - return status; + if (owner == ICE_SCHED_NODE_OWNER_LAN) { + status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs); + if (status) + return status; + } else { + status = ice_alloc_rdma_q_ctx(hw, vsi_handle, tc, new_numqs); + if (status) + return status; + } if (new_numqs) ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); @@ -1795,7 +1848,10 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, new_num_nodes, owner); if (status) return status; - vsi_ctx->sched.max_lanq[tc] = new_numqs; + if (owner == ICE_SCHED_NODE_OWNER_LAN) + vsi_ctx->sched.max_lanq[tc] = new_numqs; + else + vsi_ctx->sched.max_rdmaq[tc] = new_numqs; return 0; } @@ -1861,6 +1917,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, * recreate the child nodes all the time in these cases. */ vsi_ctx->sched.max_lanq[tc] = 0; + vsi_ctx->sched.max_rdmaq[tc] = 0; } /* update the VSI child nodes */ @@ -1990,6 +2047,8 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) } if (owner == ICE_SCHED_NODE_OWNER_LAN) vsi_ctx->sched.max_lanq[i] = 0; + else + vsi_ctx->sched.max_rdmaq[i] = 0; } status = 0; @@ -2686,8 +2745,8 @@ static enum ice_status ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle, unsigned long *tc_bitmap) { - struct ice_sched_agg_vsi_info *agg_vsi_info; - struct ice_sched_agg_info *agg_info; + struct ice_sched_agg_vsi_info *agg_vsi_info, *old_agg_vsi_info = NULL; + struct ice_sched_agg_info *agg_info, *old_agg_info; enum ice_status status = 0; struct ice_hw *hw = pi->hw; u8 tc; @@ -2697,6 +2756,20 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, agg_info = ice_get_agg_info(hw, agg_id); if (!agg_info) return ICE_ERR_PARAM; + /* If the VSI is already part of another aggregator then update + * its VSI info list + */ + old_agg_info = ice_get_vsi_agg_info(hw, vsi_handle); + if (old_agg_info && old_agg_info != agg_info) { + struct ice_sched_agg_vsi_info *vtmp; + + list_for_each_entry_safe(old_agg_vsi_info, vtmp, + &old_agg_info->agg_vsi_list, + list_entry) + if (old_agg_vsi_info->vsi_handle == vsi_handle) + break; + } + /* check if entry already exist */ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle); if (!agg_vsi_info) { @@ -2721,6 +2794,12 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, break; set_bit(tc, agg_vsi_info->tc_bitmap); + if (old_agg_vsi_info) + clear_bit(tc, old_agg_vsi_info->tc_bitmap); + } + if (old_agg_vsi_info && !old_agg_vsi_info->tc_bitmap[0]) { + list_del(&old_agg_vsi_info->list_entry); + devm_kfree(ice_hw_to_dev(pi->hw), old_agg_vsi_info); } return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 357d3073d814..3b6c1420aa7b 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ +#include "ice_lib.h" #include "ice_switch.h" #define ICE_ETH_DA_OFFSET 0 @@ -302,6 +303,10 @@ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle) devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); vsi->lan_q_ctx[i] = NULL; } + if (vsi->rdma_q_ctx[i]) { + devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); + vsi->rdma_q_ctx[i] = NULL; + } } } @@ -423,6 +428,29 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, } /** + * ice_cfg_rdma_fltr - enable/disable RDMA filtering on VSI + * @hw: pointer to HW struct + * @vsi_handle: VSI SW index + * @enable: boolean for enable/disable + */ +int +ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable) +{ + struct ice_vsi_ctx *ctx; + + ctx = ice_get_vsi_ctx(hw, vsi_handle); + if (!ctx) + return -EIO; + + if (enable) + ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; + else + ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; + + return ice_status_to_errno(ice_update_vsi(hw, vsi_handle, ctx, NULL)); +} + +/** * ice_aq_alloc_free_vsi_list * @hw: pointer to the HW struct * @vsi_list_id: VSI list ID returned or used for lookup diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 8b4f9d35c860..c5db8d56133f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -26,6 +26,8 @@ struct ice_vsi_ctx { u8 vf_num; u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS]; struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS]; + u16 num_rdma_q_entries[ICE_MAX_TRAFFIC_CLASS]; + struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS]; }; enum ice_sw_fwd_act_type { @@ -223,6 +225,8 @@ enum ice_status ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list); enum ice_status ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list); +int +ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); @@ -243,7 +247,6 @@ ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, enum ice_status ice_init_def_sw_recp(struct ice_hw *hw); u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle); -bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle); void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h new file mode 100644 index 000000000000..9bc0b8fdfc77 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_trace.h @@ -0,0 +1,232 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021 Intel Corporation. */ + +/* Modeled on trace-events-sample.h */ + +/* The trace subsystem name for ice will be "ice". + * + * This file is named ice_trace.h. + * + * Since this include file's name is different from the trace + * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end + * of this file. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ice + +/* See trace-events-sample.h for a detailed description of why this + * guard clause is different from most normal include files. + */ +#if !defined(_ICE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _ICE_TRACE_H_ + +#include <linux/tracepoint.h> + +/* ice_trace() macro enables shared code to refer to trace points + * like: + * + * trace_ice_example(args...) + * + * ... as: + * + * ice_trace(example, args...) + * + * ... to resolve to the PF version of the tracepoint without + * ifdefs, and to allow tracepoints to be disabled entirely at build + * time. + * + * Trace point should always be referred to in the driver via this + * macro. + * + * Similarly, ice_trace_enabled(trace_name) wraps references to + * trace_ice_<trace_name>_enabled() functions. + * @trace_name: name of tracepoint + */ +#define _ICE_TRACE_NAME(trace_name) (trace_##ice##_##trace_name) +#define ICE_TRACE_NAME(trace_name) _ICE_TRACE_NAME(trace_name) + +#define ice_trace(trace_name, args...) ICE_TRACE_NAME(trace_name)(args) + +#define ice_trace_enabled(trace_name) ICE_TRACE_NAME(trace_name##_enabled)() + +/* This is for events common to PF. Corresponding versions will be named + * trace_ice_*. The ice_trace() macro above will select the right trace point + * name for the driver. + */ + +/* Begin tracepoints */ + +/* Global tracepoints */ + +/* Events related to DIM, q_vectors and ring containers */ +DECLARE_EVENT_CLASS(ice_rx_dim_template, + TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim), + TP_ARGS(q_vector, dim), + TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector) + __field(struct dim *, dim) + __string(devname, q_vector->rx.ring->netdev->name)), + + TP_fast_assign(__entry->q_vector = q_vector; + __entry->dim = dim; + __assign_str(devname, q_vector->rx.ring->netdev->name);), + + TP_printk("netdev: %s Rx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d", + __get_str(devname), + __entry->q_vector->rx.ring->q_index, + __entry->dim->state, + __entry->dim->profile_ix, + __entry->dim->tune_state, + __entry->dim->steps_right, + __entry->dim->steps_left, + __entry->dim->tired) +); + +DEFINE_EVENT(ice_rx_dim_template, ice_rx_dim_work, + TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim), + TP_ARGS(q_vector, dim) +); + +DECLARE_EVENT_CLASS(ice_tx_dim_template, + TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim), + TP_ARGS(q_vector, dim), + TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector) + __field(struct dim *, dim) + __string(devname, q_vector->tx.ring->netdev->name)), + + TP_fast_assign(__entry->q_vector = q_vector; + __entry->dim = dim; + __assign_str(devname, q_vector->tx.ring->netdev->name);), + + TP_printk("netdev: %s Tx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d", + __get_str(devname), + __entry->q_vector->tx.ring->q_index, + __entry->dim->state, + __entry->dim->profile_ix, + __entry->dim->tune_state, + __entry->dim->steps_right, + __entry->dim->steps_left, + __entry->dim->tired) +); + +DEFINE_EVENT(ice_tx_dim_template, ice_tx_dim_work, + TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim), + TP_ARGS(q_vector, dim) +); + +/* Events related to a vsi & ring */ +DECLARE_EVENT_CLASS(ice_tx_template, + TP_PROTO(struct ice_ring *ring, struct ice_tx_desc *desc, + struct ice_tx_buf *buf), + + TP_ARGS(ring, desc, buf), + TP_STRUCT__entry(__field(void *, ring) + __field(void *, desc) + __field(void *, buf) + __string(devname, ring->netdev->name)), + + TP_fast_assign(__entry->ring = ring; + __entry->desc = desc; + __entry->buf = buf; + __assign_str(devname, ring->netdev->name);), + + TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname), + __entry->ring, __entry->desc, __entry->buf) +); + +#define DEFINE_TX_TEMPLATE_OP_EVENT(name) \ +DEFINE_EVENT(ice_tx_template, name, \ + TP_PROTO(struct ice_ring *ring, \ + struct ice_tx_desc *desc, \ + struct ice_tx_buf *buf), \ + TP_ARGS(ring, desc, buf)) + +DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq); +DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap); +DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap_eop); + +DECLARE_EVENT_CLASS(ice_rx_template, + TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc), + + TP_ARGS(ring, desc), + + TP_STRUCT__entry(__field(void *, ring) + __field(void *, desc) + __string(devname, ring->netdev->name)), + + TP_fast_assign(__entry->ring = ring; + __entry->desc = desc; + __assign_str(devname, ring->netdev->name);), + + TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname), + __entry->ring, __entry->desc) +); +DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq, + TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc), + TP_ARGS(ring, desc) +); + +DECLARE_EVENT_CLASS(ice_rx_indicate_template, + TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc, + struct sk_buff *skb), + + TP_ARGS(ring, desc, skb), + + TP_STRUCT__entry(__field(void *, ring) + __field(void *, desc) + __field(void *, skb) + __string(devname, ring->netdev->name)), + + TP_fast_assign(__entry->ring = ring; + __entry->desc = desc; + __entry->skb = skb; + __assign_str(devname, ring->netdev->name);), + + TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname), + __entry->ring, __entry->desc, __entry->skb) +); + +DEFINE_EVENT(ice_rx_indicate_template, ice_clean_rx_irq_indicate, + TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc, + struct sk_buff *skb), + TP_ARGS(ring, desc, skb) +); + +DECLARE_EVENT_CLASS(ice_xmit_template, + TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), + + TP_ARGS(ring, skb), + + TP_STRUCT__entry(__field(void *, ring) + __field(void *, skb) + __string(devname, ring->netdev->name)), + + TP_fast_assign(__entry->ring = ring; + __entry->skb = skb; + __assign_str(devname, ring->netdev->name);), + + TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname), + __entry->skb, __entry->ring) +); + +#define DEFINE_XMIT_TEMPLATE_OP_EVENT(name) \ +DEFINE_EVENT(ice_xmit_template, name, \ + TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), \ + TP_ARGS(ring, skb)) + +DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring); +DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring_drop); + +/* End tracepoints */ + +#endif /* _ICE_TRACE_H_ */ +/* This must be outside ifdef _ICE_TRACE_H */ + +/* This trace include file is not located in the .../include/trace + * with the kernel tracepoint definitions, because we're a loadable + * module. + */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE ../../drivers/net/ethernet/intel/ice/ice_trace +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 04748aa4c7c8..6ee8e0032d52 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -10,6 +10,7 @@ #include "ice_txrx_lib.h" #include "ice_lib.h" #include "ice.h" +#include "ice_trace.h" #include "ice_dcb_lib.h" #include "ice_xsk.h" @@ -224,6 +225,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) smp_rmb(); /* prevent any other reads prior to eop_desc */ + ice_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); /* if the descriptor isn't done, no work yet to do */ if (!(eop_desc->cmd_type_offset_bsz & cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) @@ -254,6 +256,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) /* unmap remaining buffers */ while (tx_desc != eop_desc) { + ice_trace(clean_tx_irq_unmap, tx_ring, tx_desc, tx_buf); tx_buf++; tx_desc++; i++; @@ -272,6 +275,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) dma_unmap_len_set(tx_buf, len, 0); } } + ice_trace(clean_tx_irq_unmap_eop, tx_ring, tx_desc, tx_buf); /* move us one more past the eop_desc for start of next pkt */ tx_buf++; @@ -1082,7 +1086,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) u16 stat_err_bits; int rx_buf_pgcnt; u16 vlan_tag = 0; - u8 rx_ptype; + u16 rx_ptype; /* get the Rx desc from Rx ring based on 'next_to_clean' */ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); @@ -1102,6 +1106,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) */ dma_rmb(); + ice_trace(clean_rx_irq, rx_ring, rx_desc); if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { struct ice_vsi *ctrl_vsi = rx_ring->vsi; @@ -1135,15 +1140,11 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); #endif - rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); - if (!xdp_prog) { - rcu_read_unlock(); + if (!xdp_prog) goto construct_skb; - } xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog); - rcu_read_unlock(); if (!xdp_res) goto construct_skb; if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { @@ -1207,6 +1208,7 @@ construct_skb: ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb); /* send completed skb up the stack */ ice_receive_skb(rx_ring, skb, vlan_tag); skb = NULL; @@ -2137,6 +2139,41 @@ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) } /** + * ice_tstamp - set up context descriptor for hardware timestamp + * @tx_ring: pointer to the Tx ring to send buffer on + * @skb: pointer to the SKB we're sending + * @first: Tx buffer + * @off: Tx offload parameters + */ +static void +ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb, + struct ice_tx_buf *first, struct ice_tx_offload_params *off) +{ + s8 idx; + + /* only timestamp the outbound packet if the user has requested it */ + if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) + return; + + if (!tx_ring->ptp_tx) + return; + + /* Tx timestamps cannot be sampled when doing TSO */ + if (first->tx_flags & ICE_TX_FLAGS_TSO) + return; + + /* Grab an open timestamp slot */ + idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb); + if (idx < 0) + return; + + off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | + (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) | + ((u64)idx << ICE_TXD_CTX_QW1_TSO_LEN_S)); + first->tx_flags |= ICE_TX_FLAGS_TSYN; +} + +/** * ice_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer * @tx_ring: ring to send buffer on @@ -2153,6 +2190,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) unsigned int count; int tso, csum; + ice_trace(xmit_frame_ring, tx_ring, skb); + count = ice_xmit_desc_count(skb); if (ice_chk_linearize(skb, count)) { if (__skb_linearize(skb)) @@ -2205,6 +2244,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S); + ice_tstamp(tx_ring, skb, first, &offload); + if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { struct ice_tx_ctx_desc *cdesc; u16 i = tx_ring->next_to_use; @@ -2225,6 +2266,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) return NETDEV_TX_OK; out_drop: + ice_trace(xmit_frame_ring_drop, tx_ring, skb); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c5a92ac787d6..1e46e80f3d6f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -118,6 +118,7 @@ static inline int ice_skb_pad(void) * freed instead of returned like skb packets. */ #define ICE_TX_FLAGS_DUMMY_PKT BIT(3) +#define ICE_TX_FLAGS_TSYN BIT(4) #define ICE_TX_FLAGS_IPV4 BIT(5) #define ICE_TX_FLAGS_IPV6 BIT(6) #define ICE_TX_FLAGS_TUNNEL BIT(7) @@ -311,6 +312,10 @@ struct ice_ring { u32 txq_teid; /* Added Tx queue TEID */ u16 rx_buf_len; u8 dcb_tc; /* Traffic class of ring */ + struct ice_ptp_tx *tx_tstamps; + u64 cached_phctime; + u8 ptp_rx:1; + u8 ptp_tx:1; } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_uses_build_skb(struct ice_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 207f6ee3a7f6..171397dcf00a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -38,10 +38,23 @@ void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val) * ice_ptype_to_htype - get a hash type * @ptype: the ptype value from the descriptor * - * Returns a hash type to be used by skb_set_hash + * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by + * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of + * Rx desc. */ -static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) +static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) { + struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype); + + if (!decoded.known) + return PKT_HASH_TYPE_NONE; + if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4) + return PKT_HASH_TYPE_L4; + if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3) + return PKT_HASH_TYPE_L3; + if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2) + return PKT_HASH_TYPE_L2; + return PKT_HASH_TYPE_NONE; } @@ -54,7 +67,7 @@ static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) */ static void ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 rx_ptype) + struct sk_buff *skb, u16 rx_ptype) { struct ice_32b_rx_flex_desc_nic *nic_mdid; u32 hash; @@ -81,7 +94,7 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, */ static void ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, - union ice_32b_rx_flex_desc *rx_desc, u8 ptype) + union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { struct ice_rx_ptype_decoded decoded; u16 rx_status0, rx_status1; @@ -167,7 +180,7 @@ checksum_fail: void ice_process_skb_fields(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 ptype) + struct sk_buff *skb, u16 ptype) { ice_rx_hash(rx_ring, rx_desc, skb, ptype); @@ -175,6 +188,9 @@ ice_process_skb_fields(struct ice_ring *rx_ring, skb->protocol = eth_type_trans(skb, rx_ring->netdev); ice_rx_csum(rx_ring, skb, rx_desc, ptype); + + if (rx_ring->ptp_rx) + ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 58ff58f0f972..05ac30752902 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -53,7 +53,7 @@ void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val); void ice_process_skb_fields(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 ptype); + struct sk_buff *skb, u16 ptype); void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); #endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 4474dd6a7ba1..d33d1906103c 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -14,6 +14,7 @@ #include "ice_lan_tx_rx.h" #include "ice_flex_type.h" #include "ice_protocol_type.h" +#include "ice_sbq_cmd.h" static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) { @@ -45,8 +46,10 @@ static inline u32 ice_round_to_num(u32 N, u32 R) #define ICE_DBG_FLOW BIT_ULL(9) #define ICE_DBG_SW BIT_ULL(13) #define ICE_DBG_SCHED BIT_ULL(14) +#define ICE_DBG_RDMA BIT_ULL(15) #define ICE_DBG_PKG BIT_ULL(16) #define ICE_DBG_RES BIT_ULL(17) +#define ICE_DBG_PTP BIT_ULL(19) #define ICE_DBG_AQ_MSG BIT_ULL(24) #define ICE_DBG_AQ_DESC BIT_ULL(25) #define ICE_DBG_AQ_DESC_BUF BIT_ULL(26) @@ -63,7 +66,7 @@ enum ice_aq_res_ids { /* FW update timeout definitions are in milliseconds */ #define ICE_NVM_TIMEOUT 180000 #define ICE_CHANGE_LOCK_TIMEOUT 1000 -#define ICE_GLOBAL_CFG_LOCK_TIMEOUT 3000 +#define ICE_GLOBAL_CFG_LOCK_TIMEOUT 5000 enum ice_aq_res_access_type { ICE_RES_READ = 1, @@ -146,6 +149,7 @@ struct ice_link_status { u16 max_frame_size; u16 link_speed; u16 req_speeds; + u8 link_cfg_err; u8 lse_ena; /* Link Status Event notification */ u8 link_info; u8 an_info; @@ -262,6 +266,8 @@ struct ice_hw_common_caps { u8 rss_table_entry_width; /* RSS Entry width in bits */ u8 dcb; + u8 ieee_1588; + u8 rdma; bool nvm_update_pending_nvm; bool nvm_update_pending_orom; @@ -273,6 +279,54 @@ struct ice_hw_common_caps { #define ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT BIT(3) }; +/* IEEE 1588 TIME_SYNC specific info */ +/* Function specific definitions */ +#define ICE_TS_FUNC_ENA_M BIT(0) +#define ICE_TS_SRC_TMR_OWND_M BIT(1) +#define ICE_TS_TMR_ENA_M BIT(2) +#define ICE_TS_TMR_IDX_OWND_S 4 +#define ICE_TS_TMR_IDX_OWND_M BIT(4) +#define ICE_TS_CLK_FREQ_S 16 +#define ICE_TS_CLK_FREQ_M ICE_M(0x7, ICE_TS_CLK_FREQ_S) +#define ICE_TS_CLK_SRC_S 20 +#define ICE_TS_CLK_SRC_M BIT(20) +#define ICE_TS_TMR_IDX_ASSOC_S 24 +#define ICE_TS_TMR_IDX_ASSOC_M BIT(24) + +struct ice_ts_func_info { + /* Function specific info */ + u32 clk_freq; + u8 clk_src; + u8 tmr_index_assoc; + u8 ena; + u8 tmr_index_owned; + u8 src_tmr_owned; + u8 tmr_ena; +}; + +/* Device specific definitions */ +#define ICE_TS_TMR0_OWNR_M 0x7 +#define ICE_TS_TMR0_OWND_M BIT(3) +#define ICE_TS_TMR1_OWNR_S 4 +#define ICE_TS_TMR1_OWNR_M ICE_M(0x7, ICE_TS_TMR1_OWNR_S) +#define ICE_TS_TMR1_OWND_M BIT(7) +#define ICE_TS_DEV_ENA_M BIT(24) +#define ICE_TS_TMR0_ENA_M BIT(25) +#define ICE_TS_TMR1_ENA_M BIT(26) + +struct ice_ts_dev_info { + /* Device specific info */ + u32 ena_ports; + u32 tmr_own_map; + u32 tmr0_owner; + u32 tmr1_owner; + u8 tmr0_owned; + u8 tmr1_owned; + u8 ena; + u8 tmr0_ena; + u8 tmr1_ena; +}; + /* Function specific capabilities */ struct ice_hw_func_caps { struct ice_hw_common_caps common_cap; @@ -281,6 +335,7 @@ struct ice_hw_func_caps { u32 guar_num_vsi; u32 fd_fltr_guar; /* Number of filters guaranteed */ u32 fd_fltr_best_effort; /* Number of best effort filters */ + struct ice_ts_func_info ts_func_info; }; /* Device wide capabilities */ @@ -289,6 +344,7 @@ struct ice_hw_dev_caps { u32 num_vfs_exposed; /* Total number of VFs exposed */ u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ u32 num_flow_director_fltr; /* Number of FD filters available */ + struct ice_ts_dev_info ts_dev_info; u32 num_funcs; }; @@ -440,6 +496,7 @@ struct ice_sched_node { u8 tc_num; u8 owner; #define ICE_SCHED_NODE_OWNER_LAN 0 +#define ICE_SCHED_NODE_OWNER_RDMA 2 }; /* Access Macros for Tx Sched Elements data */ @@ -511,6 +568,7 @@ struct ice_sched_vsi_info { struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS]; struct list_head list_entry; u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; + u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS]; }; /* driver defines the policy */ @@ -749,6 +807,7 @@ struct ice_hw { /* Control Queue info */ struct ice_ctl_q_info adminq; + struct ice_ctl_q_info sbq; struct ice_ctl_q_info mailboxq; u8 api_branch; /* API branch version */ @@ -784,6 +843,14 @@ struct ice_hw { u8 ucast_shared; /* true if VSIs can share unicast addr */ +#define ICE_PHY_PER_NAC 1 +#define ICE_MAX_QUAD 2 +#define ICE_NUM_QUAD_TYPE 2 +#define ICE_PORTS_PER_QUAD 4 +#define ICE_PHY_0_LAST_QUAD 1 +#define ICE_PORTS_PER_PHY 8 +#define ICE_NUM_EXTERNAL_PORTS ICE_PORTS_PER_PHY + /* Active package version (currently active) */ struct ice_pkg_ver active_pkg_ver; u32 active_track_id; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 97a46c616aca..2826570dab51 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -939,16 +939,18 @@ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf) vf->num_mac++; - if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) { - status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr, + if (is_valid_ether_addr(vf->hw_lan_addr.addr)) { + status = ice_fltr_add_mac(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); if (status) { dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %s\n", - &vf->dflt_lan_addr.addr[0], vf->vf_id, + &vf->hw_lan_addr.addr[0], vf->vf_id, ice_stat_str(status)); return ice_status_to_errno(status); } vf->num_mac++; + + ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr); } return 0; @@ -1687,7 +1689,6 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) else promisc_m = ICE_UCAST_PROMISC_BITS; - vsi = ice_get_vf_vsi(vf); if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true)) dev_err(dev, "disabling promiscuous mode failed\n"); } @@ -2386,7 +2387,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; vfres->vsi_res[0].num_queue_pairs = vsi->num_txq; ether_addr_copy(vfres->vsi_res[0].default_mac_addr, - vf->dflt_lan_addr.addr); + vf->hw_lan_addr.addr); /* match guest capabilities */ vf->driver_caps = vfres->vf_cap_flags; @@ -3542,10 +3543,9 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; - u16 num_rxq = 0, num_txq = 0; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - int i; + int i, q_idx; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -3583,18 +3583,31 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } + + q_idx = qpi->rxq.queue_id; + + /* make sure selected "q_idx" is in valid range of queues + * for selected "vsi" + */ + if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + /* copy Tx queue info from VF into VSI */ if (qpi->txq.ring_len > 0) { - num_txq++; vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; vsi->tx_rings[i]->count = qpi->txq.ring_len; + if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } } /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { u16 max_frame_size = ice_vc_get_max_frame_size(vf); - num_rxq++; vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; @@ -3611,27 +3624,20 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - } - vsi->max_frame = qpi->rxq.max_pkt_size; - /* add space for the port VLAN since the VF driver is not - * expected to account for it in the MTU calculation - */ - if (vf->port_vlan_info) - vsi->max_frame += VLAN_HLEN; - } - - /* VF can request to configure less than allocated queues or default - * allocated queues. So update the VSI with new number - */ - vsi->num_txq = num_txq; - vsi->num_rxq = num_rxq; - /* All queues of VF VSI are in TC 0 */ - vsi->tc_cfg.tc_info[0].qcount_tx = num_txq; - vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq; + vsi->max_frame = qpi->rxq.max_pkt_size; + /* add space for the port VLAN since the VF driver is not + * expected to account for it in the MTU calculation + */ + if (vf->port_vlan_info) + vsi->max_frame += VLAN_HLEN; - if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; + if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + } + } error_param: /* send the response to the VF */ @@ -3667,19 +3673,95 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf) } /** + * ice_vc_ether_addr_type - get type of virtchnl_ether_addr + * @vc_ether_addr: used to extract the type + */ +static u8 +ice_vc_ether_addr_type(struct virtchnl_ether_addr *vc_ether_addr) +{ + return (vc_ether_addr->type & VIRTCHNL_ETHER_ADDR_TYPE_MASK); +} + +/** + * ice_is_vc_addr_legacy - check if the MAC address is from an older VF + * @vc_ether_addr: VIRTCHNL structure that contains MAC and type + */ +static bool +ice_is_vc_addr_legacy(struct virtchnl_ether_addr *vc_ether_addr) +{ + u8 type = ice_vc_ether_addr_type(vc_ether_addr); + + return (type == VIRTCHNL_ETHER_ADDR_LEGACY); +} + +/** + * ice_is_vc_addr_primary - check if the MAC address is the VF's primary MAC + * @vc_ether_addr: VIRTCHNL structure that contains MAC and type + * + * This function should only be called when the MAC address in + * virtchnl_ether_addr is a valid unicast MAC + */ +static bool +ice_is_vc_addr_primary(struct virtchnl_ether_addr __maybe_unused *vc_ether_addr) +{ + u8 type = ice_vc_ether_addr_type(vc_ether_addr); + + return (type == VIRTCHNL_ETHER_ADDR_PRIMARY); +} + +/** + * ice_vfhw_mac_add - update the VF's cached hardware MAC if allowed + * @vf: VF to update + * @vc_ether_addr: structure from VIRTCHNL with MAC to add + */ +static void +ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr) +{ + u8 *mac_addr = vc_ether_addr->addr; + + if (!is_valid_ether_addr(mac_addr)) + return; + + /* only allow legacy VF drivers to set the device and hardware MAC if it + * is zero and allow new VF drivers to set the hardware MAC if the type + * was correctly specified over VIRTCHNL + */ + if ((ice_is_vc_addr_legacy(vc_ether_addr) && + is_zero_ether_addr(vf->hw_lan_addr.addr)) || + ice_is_vc_addr_primary(vc_ether_addr)) { + ether_addr_copy(vf->dev_lan_addr.addr, mac_addr); + ether_addr_copy(vf->hw_lan_addr.addr, mac_addr); + } + + /* hardware and device MACs are already set, but its possible that the + * VF driver sent the VIRTCHNL_OP_ADD_ETH_ADDR message before the + * VIRTCHNL_OP_DEL_ETH_ADDR when trying to update its MAC, so save it + * away for the legacy VF driver case as it will be updated in the + * delete flow for this case + */ + if (ice_is_vc_addr_legacy(vc_ether_addr)) { + ether_addr_copy(vf->legacy_last_added_umac.addr, + mac_addr); + vf->legacy_last_added_umac.time_modified = jiffies; + } +} + +/** * ice_vc_add_mac_addr - attempt to add the MAC address passed in * @vf: pointer to the VF info * @vsi: pointer to the VF's VSI - * @mac_addr: MAC address to add + * @vc_ether_addr: VIRTCHNL MAC address structure used to add MAC */ static int -ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) +ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_ether_addr *vc_ether_addr) { struct device *dev = ice_pf_to_dev(vf->pf); + u8 *mac_addr = vc_ether_addr->addr; enum ice_status status; - /* default unicast MAC already added */ - if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + /* device MAC already added */ + if (ether_addr_equal(mac_addr, vf->dev_lan_addr.addr)) return 0; if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) { @@ -3698,12 +3780,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) return -EIO; } - /* Set the default LAN address to the latest unicast MAC address added - * by the VF. The default LAN address is reported by the PF via - * ndo_get_vf_config. - */ - if (is_unicast_ether_addr(mac_addr)) - ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr); + ice_vfhw_mac_add(vf, vc_ether_addr); vf->num_mac++; @@ -3711,19 +3788,65 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) } /** + * ice_is_legacy_umac_expired - check if last added legacy unicast MAC expired + * @last_added_umac: structure used to check expiration + */ +static bool ice_is_legacy_umac_expired(struct ice_time_mac *last_added_umac) +{ +#define ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME msecs_to_jiffies(3000) + return time_is_before_jiffies(last_added_umac->time_modified + + ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME); +} + +/** + * ice_vfhw_mac_del - update the VF's cached hardware MAC if allowed + * @vf: VF to update + * @vc_ether_addr: structure from VIRTCHNL with MAC to delete + */ +static void +ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr) +{ + u8 *mac_addr = vc_ether_addr->addr; + + if (!is_valid_ether_addr(mac_addr) || + !ether_addr_equal(vf->dev_lan_addr.addr, mac_addr)) + return; + + /* allow the device MAC to be repopulated in the add flow and don't + * clear the hardware MAC (i.e. hw_lan_addr.addr) here as that is meant + * to be persistent on VM reboot and across driver unload/load, which + * won't work if we clear the hardware MAC here + */ + eth_zero_addr(vf->dev_lan_addr.addr); + + /* only update cached hardware MAC for legacy VF drivers on delete + * because we cannot guarantee order/type of MAC from the VF driver + */ + if (ice_is_vc_addr_legacy(vc_ether_addr) && + !ice_is_legacy_umac_expired(&vf->legacy_last_added_umac)) { + ether_addr_copy(vf->dev_lan_addr.addr, + vf->legacy_last_added_umac.addr); + ether_addr_copy(vf->hw_lan_addr.addr, + vf->legacy_last_added_umac.addr); + } +} + +/** * ice_vc_del_mac_addr - attempt to delete the MAC address passed in * @vf: pointer to the VF info * @vsi: pointer to the VF's VSI - * @mac_addr: MAC address to delete + * @vc_ether_addr: VIRTCHNL MAC address structure used to delete MAC */ static int -ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) +ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_ether_addr *vc_ether_addr) { struct device *dev = ice_pf_to_dev(vf->pf); + u8 *mac_addr = vc_ether_addr->addr; enum ice_status status; if (!ice_can_vf_change_mac(vf) && - ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + ether_addr_equal(vf->dev_lan_addr.addr, mac_addr)) return 0; status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI); @@ -3737,8 +3860,7 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) return -EIO; } - if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) - eth_zero_addr(vf->dflt_lan_addr.addr); + ice_vfhw_mac_del(vf, vc_ether_addr); vf->num_mac--; @@ -3757,7 +3879,8 @@ static int ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) { int (*ice_vc_cfg_mac) - (struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr); + (struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_ether_addr *virtchnl_ether_addr); enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; @@ -3806,7 +3929,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) is_zero_ether_addr(mac_addr)) continue; - result = ice_vc_cfg_mac(vf, vsi, mac_addr); + result = ice_vc_cfg_mac(vf, vsi, &al->list[i]); if (result == -EEXIST || result == -ENOENT) { continue; } else if (result) { @@ -4444,7 +4567,7 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) return -EBUSY; ivi->vf = vf_id; - ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr); + ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr); /* VF configuration for VLAN and applicable QoS */ ivi->vlan = vf->port_vlan_info & VLAN_VID_MASK; @@ -4520,7 +4643,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) vf = &pf->vf[vf_id]; /* nothing left to do, unicast MAC already set */ - if (ether_addr_equal(vf->dflt_lan_addr.addr, mac)) + if (ether_addr_equal(vf->dev_lan_addr.addr, mac) && + ether_addr_equal(vf->hw_lan_addr.addr, mac)) return 0; ret = ice_check_vf_ready_for_cfg(vf); @@ -4536,7 +4660,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* VF is notified of its new MAC via the PF's response to the * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset */ - ether_addr_copy(vf->dflt_lan_addr.addr, mac); + ether_addr_copy(vf->dev_lan_addr.addr, mac); + ether_addr_copy(vf->hw_lan_addr.addr, mac); if (is_zero_ether_addr(mac)) { /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */ vf->pf_set_mac = false; @@ -4689,7 +4814,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf) dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n", vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id, - vf->dflt_lan_addr.addr, + vf->dev_lan_addr.addr, test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off"); } @@ -4733,7 +4858,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", vf->mdd_tx_events.count, hw->pf_id, i, - vf->dflt_lan_addr.addr); + vf->dev_lan_addr.addr); } } } @@ -4823,7 +4948,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, if (pf_vsi) dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - &vf->dflt_lan_addr.addr[0], + &vf->dev_lan_addr.addr[0], pf_vsi->netdev->dev_addr); } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index d800ed83d6c3..842cb077df86 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -58,6 +58,11 @@ enum ice_virtchnl_cap { ICE_VIRTCHNL_VF_CAP_PRIVILEGE, }; +struct ice_time_mac { + unsigned long time_modified; + u8 addr[ETH_ALEN]; +}; + /* VF MDD events print structure */ struct ice_mdd_vf_events { u16 count; /* total count of Rx|Tx events */ @@ -78,7 +83,9 @@ struct ice_vf { struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ struct virtchnl_version_info vf_ver; u32 driver_caps; /* reported by VF driver */ - struct virtchnl_ether_addr dflt_lan_addr; + struct virtchnl_ether_addr dev_lan_addr; + struct virtchnl_ether_addr hw_lan_addr; + struct ice_time_mac legacy_last_added_umac; DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF); DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF); u16 port_vlan_info; /* Port VLAN ID and QoS */ @@ -151,16 +158,18 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); #else /* CONFIG_PCI_IOV */ -#define ice_process_vflr_event(pf) do {} while (0) -#define ice_free_vfs(pf) do {} while (0) -#define ice_vc_process_vf_msg(pf, event) do {} while (0) -#define ice_vc_notify_link_state(pf) do {} while (0) -#define ice_vc_notify_reset(pf) do {} while (0) -#define ice_set_vf_state_qs_dis(vf) do {} while (0) -#define ice_vf_lan_overflow_event(pf, event) do {} while (0) -#define ice_print_vfs_mdd_events(pf) do {} while (0) -#define ice_print_vf_rx_mdd_event(vf) do {} while (0) -#define ice_restore_all_vfs_msi_state(pdev) do {} while (0) +static inline void ice_process_vflr_event(struct ice_pf *pf) { } +static inline void ice_free_vfs(struct ice_pf *pf) { } +static inline +void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { } +static inline void ice_vc_notify_link_state(struct ice_pf *pf) { } +static inline void ice_vc_notify_reset(struct ice_pf *pf) { } +static inline void ice_set_vf_state_qs_dis(struct ice_vf *vf) { } +static inline +void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { } +static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } +static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } +static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { } static inline bool ice_is_malicious_vf(struct ice_pf __always_unused *pf, diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a1f89ea3c2bd..5a9f61deeb38 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -236,7 +236,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring); } - err = ice_setup_rx_ctx(rx_ring); + err = ice_vsi_cfg_rxq(rx_ring); if (err) goto free_buf; @@ -466,7 +466,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) struct ice_ring *xdp_ring; u32 act; - rcu_read_lock(); /* ZC patch is enabled only when XDP program is set, * so here it can not be NULL */ @@ -478,7 +477,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (err) goto out_failure; - rcu_read_unlock(); return ICE_XDP_REDIR; } @@ -503,7 +501,6 @@ out_failure: break; } - rcu_read_unlock(); return result; } @@ -528,7 +525,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) struct sk_buff *skb; u16 stat_err_bits; u16 vlan_tag = 0; - u8 rx_ptype; + u16 rx_ptype; rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index fad783690134..ea208808623a 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -60,7 +60,7 @@ ice_xsk_wakeup(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } -#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0) -#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0) +static inline void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) { } +static inline void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) { } #endif /* CONFIG_XDP_SOCKETS */ #endif /* !_ICE_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 50863fd87d53..cbe92fd23a70 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -2756,6 +2756,7 @@ out: return ret_val; } +#ifdef CONFIG_IGB_HWMON static const u8 e1000_emc_temp_data[4] = { E1000_EMC_INTERNAL_DATA, E1000_EMC_DIODE1_DATA, @@ -2769,7 +2770,6 @@ static const u8 e1000_emc_therm_limit[4] = { E1000_EMC_DIODE3_THERM_LIMIT }; -#ifdef CONFIG_IGB_HWMON /** * igb_get_thermal_sensor_data_generic - Gathers thermal sensor data * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 7545da216d8b..636a1b1fb7e1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -831,7 +831,7 @@ static int igb_set_eeprom(struct net_device *netdev, memcpy(ptr, bytes, eeprom->len); for (i = 0; i < last_word - first_word + 1; i++) - eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]); + cpu_to_le16s(&eeprom_buff[i]); ret_val = hw->nvm.ops.write(hw, first_word, last_word - first_word + 1, eeprom_buff); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b2a042f825ff..7e6435dc7e80 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -356,7 +356,7 @@ static void igb_dump(struct igb_adapter *adapter) struct igb_reg_info *reginfo; struct igb_ring *tx_ring; union e1000_adv_tx_desc *tx_desc; - struct my_u0 { u64 a; u64 b; } *u0; + struct my_u0 { __le64 a; __le64 b; } *u0; struct igb_ring *rx_ring; union e1000_adv_rx_desc *rx_desc; u32 staterr; @@ -2643,7 +2643,8 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter, } input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; - input->filter.vlan_tci = match.key->vlan_priority; + input->filter.vlan_tci = + (__force __be16)match.key->vlan_priority; } } @@ -6275,12 +6276,12 @@ int igb_xmit_xdp_ring(struct igb_adapter *adapter, cmd_type |= len | IGB_TXD_DCMD; tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - olinfo_status = cpu_to_le32(len << E1000_ADVTXD_PAYLEN_SHIFT); + olinfo_status = len << E1000_ADVTXD_PAYLEN_SHIFT; /* 82575 requires a unique index per ring */ if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) olinfo_status |= tx_ring->reg_idx << 4; - tx_desc->read.olinfo_status = olinfo_status; + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer->bytecount); @@ -8380,7 +8381,6 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter, struct bpf_prog *xdp_prog; u32 act; - rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (!xdp_prog) @@ -8415,7 +8415,6 @@ out_failure: break; } xdp_out: - rcu_read_unlock(); return ERR_PTR(-result); } @@ -8592,7 +8591,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) - vid = be16_to_cpu(rx_desc->wb.upper.vlan); + vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); else vid = le16_to_cpu(rx_desc->wb.upper.vlan); diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index d68cd4466a54..0011b15e678c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1131,12 +1131,12 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, | E1000_FTQF_MASK); /* mask all inputs */ ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - wr32(E1000_IMIR(3), htons(PTP_EV_PORT)); + wr32(E1000_IMIR(3), (__force unsigned int)htons(PTP_EV_PORT)); wr32(E1000_IMIREXT(3), (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); if (hw->mac.type == e1000_82576) { /* enable source port check */ - wr32(E1000_SPQF(3), htons(PTP_EV_PORT)); + wr32(E1000_SPQF(3), (__force unsigned int)htons(PTP_EV_PORT)); ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; } wr32(E1000_FTQF(3), ftqf); diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index fb3fbcb13331..1bbe9862a758 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -83,14 +83,14 @@ static int igbvf_desc_unused(struct igbvf_ring *ring) static void igbvf_receive_skb(struct igbvf_adapter *adapter, struct net_device *netdev, struct sk_buff *skb, - u32 status, u16 vlan) + u32 status, __le16 vlan) { u16 vid; if (status & E1000_RXD_STAT_VP) { if ((adapter->flags & IGBVF_FLAG_RX_LB_VLAN_BSWAP) && (status & E1000_RXDEXT_STATERR_LB)) - vid = be16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; + vid = be16_to_cpu((__force __be16)vlan) & E1000_RXD_SPC_VLAN_MASK; else vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; if (test_bit(vid, adapter->active_vlans)) @@ -2056,7 +2056,7 @@ static int igbvf_tso(struct igbvf_ring *tx_ring, /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* MSS L4LEN IDX */ mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT; diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h index c71b0d7dbcee..ba9bb3132d5d 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.h +++ b/drivers/net/ethernet/intel/igbvf/vf.h @@ -35,31 +35,31 @@ struct e1000_hw; /* Receive Descriptor - Advanced */ union e1000_adv_rx_desc { struct { - u64 pkt_addr; /* Packet buffer address */ - u64 hdr_addr; /* Header buffer address */ + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ } read; struct { struct { union { - u32 data; + __le32 data; struct { - u16 pkt_info; /* RSS/Packet type */ + __le16 pkt_info; /* RSS/Packet type */ /* Split Header, hdr buffer length */ - u16 hdr_info; + __le16 hdr_info; } hs_rss; } lo_dword; union { - u32 rss; /* RSS Hash */ + __le32 rss; /* RSS Hash */ struct { - u16 ip_id; /* IP id */ - u16 csum; /* Packet Checksum */ + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { - u32 status_error; /* ext status/error */ - u16 length; /* Packet length */ - u16 vlan; /* VLAN tag */ + __le32 status_error; /* ext status/error */ + __le16 length; /* Packet length */ + __le16 vlan; /* VLAN tag */ } upper; } wb; /* writeback */ }; @@ -70,14 +70,14 @@ union e1000_adv_rx_desc { /* Transmit Descriptor - Advanced */ union e1000_adv_tx_desc { struct { - u64 buffer_addr; /* Address of descriptor's data buf */ - u32 cmd_type_len; - u32 olinfo_status; + __le64 buffer_addr; /* Address of descriptor's data buf */ + __le32 cmd_type_len; + __le32 olinfo_status; } read; struct { - u64 rsvd; /* Reserved */ - u32 nxtseq_seed; - u32 status; + __le64 rsvd; /* Reserved */ + __le32 nxtseq_seed; + __le32 status; } wb; }; @@ -94,10 +94,10 @@ union e1000_adv_tx_desc { /* Context descriptors */ struct e1000_adv_tx_context_desc { - u32 vlan_macip_lens; - u32 seqnum_seed; - u32 type_tucmd_mlhl; - u32 mss_l4len_idx; + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; }; #define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 25871351730b..9e0bbb2e55e3 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -118,6 +118,7 @@ struct igc_ring { }; struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; } ____cacheline_internodealigned_in_smp; /* Board specific private data structure */ @@ -255,6 +256,11 @@ bool igc_has_link(struct igc_adapter *adapter); void igc_reset(struct igc_adapter *adapter); int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx); void igc_update_stats(struct igc_adapter *adapter); +void igc_disable_rx_ring(struct igc_ring *ring); +void igc_enable_rx_ring(struct igc_ring *ring); +void igc_disable_tx_ring(struct igc_ring *ring); +void igc_enable_tx_ring(struct igc_ring *ring); +int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); /* igc_dump declarations */ void igc_rings_dump(struct igc_adapter *adapter); @@ -366,6 +372,7 @@ extern char igc_driver_name[]; /* VLAN info */ #define IGC_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IGC_TX_FLAGS_VLAN_SHIFT 16 /* igc_test_staterr - tests bits within Rx descriptor status and error fields */ static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc, @@ -390,8 +397,6 @@ enum igc_tx_flags { /* olinfo flags */ IGC_TX_FLAGS_IPV4 = 0x10, IGC_TX_FLAGS_CSUM = 0x20, - - IGC_TX_FLAGS_XDP = 0x100, }; enum igc_boards { @@ -408,12 +413,19 @@ enum igc_boards { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) +enum igc_tx_buffer_type { + IGC_TX_BUFFER_TYPE_SKB, + IGC_TX_BUFFER_TYPE_XDP, + IGC_TX_BUFFER_TYPE_XSK, +}; + /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ struct igc_tx_buffer { union igc_adv_tx_desc *next_to_watch; unsigned long time_stamp; + enum igc_tx_buffer_type type; union { struct sk_buff *skb; struct xdp_frame *xdpf; @@ -428,14 +440,19 @@ struct igc_tx_buffer { }; struct igc_rx_buffer { - dma_addr_t dma; - struct page *page; + union { + struct { + dma_addr_t dma; + struct page *page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; + __u32 page_offset; #else - __u16 page_offset; + __u16 page_offset; #endif - __u16 pagecnt_bias; + __u16 pagecnt_bias; + }; + struct xdp_buff *xdp; + }; }; struct igc_q_vector { @@ -521,7 +538,8 @@ enum igc_ring_flags_t { IGC_RING_FLAG_RX_SCTP_CSUM, IGC_RING_FLAG_RX_LB_VLAN_BSWAP, IGC_RING_FLAG_TX_CTX_IDX, - IGC_RING_FLAG_TX_DETECT_HANG + IGC_RING_FLAG_TX_DETECT_HANG, + IGC_RING_FLAG_AF_XDP_ZC, }; #define ring_uses_large_buffer(ring) \ diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index ea627ce52525..ce530f5fd7bd 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -78,9 +78,11 @@ union igc_adv_rx_desc { /* Additional Transmit Descriptor Control definitions */ #define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ +#define IGC_TXDCTL_SWFLUSH 0x04000000 /* Transmit Software Flush */ /* Additional Receive Descriptor Control definitions */ #define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ +#define IGC_RXDCTL_SWFLUSH 0x04000000 /* Receive Software Flush */ /* SRRCTL bit definitions */ #define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 0103dda32f39..c3a5a5518790 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -94,12 +94,13 @@ #define IGC_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ #define IGC_CTRL_FRCSPD 0x00000800 /* Force Speed */ #define IGC_CTRL_FRCDPX 0x00001000 /* Force Duplex */ +#define IGC_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ #define IGC_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ #define IGC_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ -#define IGC_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ -#define IGC_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ +#define IGC_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ +#define IGC_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ #define MAX_JUMBO_FRAME_SIZE 0x2600 @@ -128,7 +129,6 @@ #define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ /* 1000BASE-T Control Register */ -#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ #define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ #define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ @@ -323,6 +323,9 @@ #define IGC_RXD_STAT_IXSM 0x04 /* Ignore checksum */ #define IGC_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ #define IGC_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ +#define IGC_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ + +#define IGC_RXDEXT_STATERR_LB 0x00040000 /* Advanced Receive Descriptor bit definitions */ #define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c index 495bed47ed0a..c09c95cc5f70 100644 --- a/drivers/net/ethernet/intel/igc/igc_dump.c +++ b/drivers/net/ethernet/intel/igc/igc_dump.c @@ -112,7 +112,7 @@ static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo) void igc_rings_dump(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct my_u0 { u64 a; u64 b; } *u0; + struct my_u0 { __le64 a; __le64 b; } *u0; union igc_adv_tx_desc *tx_desc; union igc_adv_rx_desc *rx_desc; struct igc_ring *tx_ring; diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 9722449d7633..fa4171860623 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -554,7 +554,7 @@ static int igc_ethtool_set_eeprom(struct net_device *netdev, memcpy(ptr, bytes, eeprom->len); for (i = 0; i < last_word - first_word + 1; i++) - eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]); + cpu_to_le16s(&eeprom_buff[i]); ret_val = hw->nvm.ops.write(hw, first_word, last_word - first_word + 1, eeprom_buff); @@ -765,35 +765,22 @@ static void igc_ethtool_get_strings(struct net_device *netdev, u32 stringset, IGC_TEST_LEN * ETH_GSTRING_LEN); break; case ETH_SS_STATS: - for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) { - memcpy(p, igc_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < IGC_NETDEV_STATS_LEN; i++) { - memcpy(p, igc_gstrings_net_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) + ethtool_sprintf(&p, igc_gstrings_stats[i].stat_string); + for (i = 0; i < IGC_NETDEV_STATS_LEN; i++) + ethtool_sprintf(&p, + igc_gstrings_net_stats[i].stat_string); for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(p, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_restart", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); + ethtool_sprintf(&p, "tx_queue_%u_restart", i); } for (i = 0; i < adapter->num_rx_queues; i++) { - sprintf(p, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_drops", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_csum_err", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_alloc_failed", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); + ethtool_sprintf(&p, "rx_queue_%u_drops", i); + ethtool_sprintf(&p, "rx_queue_%u_csum_err", i); + ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i); } /* BUG_ON(p - data != IGC_STATS_LEN * ETH_GSTRING_LEN); */ break; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f1adf154ec4a..95323095094d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -11,7 +11,7 @@ #include <linux/pm_runtime.h> #include <net/pkt_sched.h> #include <linux/bpf_trace.h> - +#include <net/xdp_sock_drv.h> #include <net/ipv6.h> #include "igc.h" @@ -111,6 +111,9 @@ void igc_reset(struct igc_adapter *adapter) if (!netif_running(adapter->netdev)) igc_power_down_phy_copper_base(&adapter->hw); + /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ + wr32(IGC_VET, ETH_P_8021Q); + /* Re-enable PTP, where applicable. */ igc_ptp_reset(adapter); @@ -171,6 +174,14 @@ static void igc_get_hw_control(struct igc_adapter *adapter) ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); } +static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) +{ + dma_unmap_single(dev, dma_unmap_addr(buf, dma), + dma_unmap_len(buf, len), DMA_TO_DEVICE); + + dma_unmap_len_set(buf, len, 0); +} + /** * igc_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned @@ -179,20 +190,27 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) { u16 i = tx_ring->next_to_clean; struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; + u32 xsk_frames = 0; while (i != tx_ring->next_to_use) { union igc_adv_tx_desc *eop_desc, *tx_desc; - if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP) + switch (tx_buffer->type) { + case IGC_TX_BUFFER_TYPE_XSK: + xsk_frames++; + break; + case IGC_TX_BUFFER_TYPE_XDP: xdp_return_frame(tx_buffer->xdpf); - else + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); + break; + case IGC_TX_BUFFER_TYPE_SKB: dev_kfree_skb_any(tx_buffer->skb); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); + break; + default: + netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); + break; + } /* check for eop_desc to determine the end of the packet */ eop_desc = tx_buffer->next_to_watch; @@ -211,10 +229,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* unmap any remaining paged data */ if (dma_unmap_len(tx_buffer, len)) - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); } /* move us one more past the eop_desc for start of next pkt */ @@ -226,6 +241,9 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) } } + if (tx_ring->xsk_pool && xsk_frames) + xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); + /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); @@ -346,11 +364,7 @@ static int igc_setup_all_tx_resources(struct igc_adapter *adapter) return err; } -/** - * igc_clean_rx_ring - Free Rx Buffers per Queue - * @rx_ring: ring to free buffers from - */ -static void igc_clean_rx_ring(struct igc_ring *rx_ring) +static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) { u16 i = rx_ring->next_to_clean; @@ -383,12 +397,39 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring) if (i == rx_ring->count) i = 0; } +} + +static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) +{ + struct igc_rx_buffer *bi; + u16 i; + + for (i = 0; i < ring->count; i++) { + bi = &ring->rx_buffer_info[i]; + if (!bi->xdp) + continue; + + xsk_buff_free(bi->xdp); + bi->xdp = NULL; + } +} - clear_ring_uses_large_buffer(rx_ring); +/** + * igc_clean_rx_ring - Free Rx Buffers per Queue + * @ring: ring to free buffers from + */ +static void igc_clean_rx_ring(struct igc_ring *ring) +{ + if (ring->xsk_pool) + igc_clean_rx_ring_xsk_pool(ring); + else + igc_clean_rx_ring_page_shared(ring); - rx_ring->next_to_alloc = 0; - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; + clear_ring_uses_large_buffer(ring); + + ring->next_to_alloc = 0; + ring->next_to_clean = 0; + ring->next_to_use = 0; } /** @@ -414,7 +455,7 @@ void igc_free_rx_resources(struct igc_ring *rx_ring) { igc_clean_rx_ring(rx_ring); - igc_xdp_unregister_rxq_info(rx_ring); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -453,11 +494,16 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) { struct net_device *ndev = rx_ring->netdev; struct device *dev = rx_ring->dev; + u8 index = rx_ring->queue_index; int size, desc_len, res; - res = igc_xdp_register_rxq_info(rx_ring); - if (res < 0) + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, + rx_ring->q_vector->napi.napi_id); + if (res < 0) { + netdev_err(ndev, "Failed to register xdp_rxq index %u\n", + index); return res; + } size = sizeof(struct igc_rx_buffer) * rx_ring->count; rx_ring->rx_buffer_info = vzalloc(size); @@ -483,7 +529,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) return 0; err: - igc_xdp_unregister_rxq_info(rx_ring); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); @@ -515,9 +561,14 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter) return err; } -static bool igc_xdp_is_enabled(struct igc_adapter *adapter) +static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, + struct igc_ring *ring) { - return !!adapter->xdp_prog; + if (!igc_xdp_is_enabled(adapter) || + !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) + return NULL; + + return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); } /** @@ -535,6 +586,20 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, int reg_idx = ring->reg_idx; u32 srrctl = 0, rxdctl = 0; u64 rdba = ring->dma; + u32 buf_size; + + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + ring->xsk_pool = igc_get_xsk_pool(adapter, ring); + if (ring->xsk_pool) { + WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL)); + xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); + } else { + WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL)); + } if (igc_xdp_is_enabled(adapter)) set_ring_uses_large_buffer(ring); @@ -558,12 +623,15 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, ring->next_to_clean = 0; ring->next_to_use = 0; - /* set descriptor configuration */ - srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT; + if (ring->xsk_pool) + buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); + else if (ring_uses_large_buffer(ring)) + buf_size = IGC_RXBUFFER_3072; else - srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT; + buf_size = IGC_RXBUFFER_2048; + + srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; + srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT; srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; wr32(IGC_SRRCTL(reg_idx), srrctl); @@ -618,6 +686,8 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, u64 tdba = ring->dma; u32 txdctl = 0; + ring->xsk_pool = igc_get_xsk_pool(adapter, ring); + /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); @@ -1055,13 +1125,17 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) -static u32 igc_tx_cmd_type(u32 tx_flags) +static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ u32 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DCMD_IFCS; + /* set HW vlan bit if vlan is present */ + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, + IGC_ADVTXD_DCMD_VLE); + /* set segmentation bits for TSO */ cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, (IGC_ADVTXD_DCMD_TSE)); @@ -1070,6 +1144,9 @@ static u32 igc_tx_cmd_type(u32 tx_flags) cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, (IGC_ADVTXD_MAC_TSTAMP)); + /* insert frame checksum */ + cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); + return cmd_type; } @@ -1104,8 +1181,9 @@ static int igc_tx_map(struct igc_ring *tx_ring, u16 i = tx_ring->next_to_use; unsigned int data_len, size; dma_addr_t dma; - u32 cmd_type = igc_tx_cmd_type(tx_flags); + u32 cmd_type; + cmd_type = igc_tx_cmd_type(skb, tx_flags); tx_desc = IGC_TX_DESC(tx_ring, i); igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); @@ -1211,11 +1289,7 @@ dma_error: /* clear dma mappings for failed tx_buffer_info map */ while (tx_buffer != first) { if (dma_unmap_len(tx_buffer, len)) - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); if (i-- == 0) i += tx_ring->count; @@ -1223,11 +1297,7 @@ dma_error: } if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); dev_kfree_skb_any(tx_buffer->skb); tx_buffer->skb = NULL; @@ -1359,6 +1429,7 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->type = IGC_TX_BUFFER_TYPE_SKB; first->skb = skb; first->bytecount = skb->len; first->gso_segs = 1; @@ -1383,6 +1454,11 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, } } + if (skb_vlan_tag_present(skb)) { + tx_flags |= IGC_TX_FLAGS_VLAN; + tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); + } + /* record initial flags and protocol */ first->tx_flags = tx_flags; first->protocol = protocol; @@ -1482,6 +1558,25 @@ static inline void igc_rx_hash(struct igc_ring *ring, PKT_HASH_TYPE_L3); } +static void igc_rx_vlan(struct igc_ring *rx_ring, + union igc_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct net_device *dev = rx_ring->netdev; + u16 vid; + + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && + igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { + if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && + test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) + vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); + else + vid = le16_to_cpu(rx_desc->wb.upper.vlan); + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } +} + /** * igc_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on @@ -1500,11 +1595,37 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring, igc_rx_checksum(rx_ring, rx_desc, skb); + igc_rx_vlan(rx_ring, rx_desc, skb); + skb_record_rx_queue(skb, rx_ring->queue_index); skb->protocol = eth_type_trans(skb, rx_ring->netdev); } +static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) +{ + bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 ctrl; + + ctrl = rd32(IGC_CTRL); + + if (enable) { + /* enable VLAN tag insert/strip */ + ctrl |= IGC_CTRL_VME; + } else { + /* disable VLAN tag insert/strip */ + ctrl &= ~IGC_CTRL_VME; + } + wr32(IGC_CTRL, ctrl); +} + +static void igc_restore_vlan(struct igc_adapter *adapter) +{ + igc_vlan_mode(adapter->netdev, adapter->netdev->features); +} + static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, const unsigned int size, int *rx_buffer_pgcnt) @@ -1930,6 +2051,63 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) } } +static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) +{ + union igc_adv_rx_desc *desc; + u16 i = ring->next_to_use; + struct igc_rx_buffer *bi; + dma_addr_t dma; + bool ok = true; + + if (!count) + return ok; + + desc = IGC_RX_DESC(ring, i); + bi = &ring->rx_buffer_info[i]; + i -= ring->count; + + do { + bi->xdp = xsk_buff_alloc(ring->xsk_pool); + if (!bi->xdp) { + ok = false; + break; + } + + dma = xsk_buff_xdp_get_dma(bi->xdp); + desc->read.pkt_addr = cpu_to_le64(dma); + + desc++; + bi++; + i++; + if (unlikely(!i)) { + desc = IGC_RX_DESC(ring, 0); + bi = ring->rx_buffer_info; + i -= ring->count; + } + + /* Clear the length for the next_to_use descriptor. */ + desc->wb.upper.length = 0; + + count--; + } while (count); + + i += ring->count; + + if (ring->next_to_use != i) { + ring->next_to_use = i; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, ring->tail); + } + + return ok; +} + static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, struct xdp_frame *xdpf, struct igc_ring *ring) @@ -1942,8 +2120,8 @@ static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, return -ENOMEM; } + buffer->type = IGC_TX_BUFFER_TYPE_XDP; buffer->xdpf = xdpf; - buffer->tx_flags = IGC_TX_FLAGS_XDP; buffer->protocol = 0; buffer->bytecount = xdpf->len; buffer->gso_segs = 1; @@ -2025,35 +2203,24 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) return res; } -static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, - struct xdp_buff *xdp) +/* This function assumes rcu_read_lock() is held by the caller. */ +static int __igc_xdp_run_prog(struct igc_adapter *adapter, + struct bpf_prog *prog, + struct xdp_buff *xdp) { - struct bpf_prog *prog; - int res; - u32 act; + u32 act = bpf_prog_run_xdp(prog, xdp); - rcu_read_lock(); - - prog = READ_ONCE(adapter->xdp_prog); - if (!prog) { - res = IGC_XDP_PASS; - goto unlock; - } - - act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS: - res = IGC_XDP_PASS; - break; + return IGC_XDP_PASS; case XDP_TX: if (igc_xdp_xmit_back(adapter, xdp) < 0) goto out_failure; - res = IGC_XDP_TX; - break; + return IGC_XDP_TX; case XDP_REDIRECT: if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) goto out_failure; - res = IGC_XDP_REDIRECT; + return IGC_XDP_REDIRECT; break; default: bpf_warn_invalid_xdp_action(act); @@ -2063,12 +2230,25 @@ out_failure: trace_xdp_exception(adapter->netdev, prog, act); fallthrough; case XDP_DROP: - res = IGC_XDP_CONSUMED; - break; + return IGC_XDP_CONSUMED; } +} -unlock: - rcu_read_unlock(); +static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, + struct xdp_buff *xdp) +{ + struct bpf_prog *prog; + int res; + + prog = READ_ONCE(adapter->xdp_prog); + if (!prog) { + res = IGC_XDP_PASS; + goto out; + } + + res = __igc_xdp_run_prog(adapter, prog, xdp); + +out: return ERR_PTR(-res); } @@ -2102,6 +2282,20 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status) xdp_do_flush(); } +static void igc_update_rx_stats(struct igc_q_vector *q_vector, + unsigned int packets, unsigned int bytes) +{ + struct igc_ring *ring = q_vector->rx.ring; + + u64_stats_update_begin(&ring->rx_syncp); + ring->rx_stats.packets += packets; + ring->rx_stats.bytes += bytes; + u64_stats_update_end(&ring->rx_syncp); + + q_vector->rx.total_packets += packets; + q_vector->rx.total_bytes += bytes; +} + static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) { unsigned int total_bytes = 0, total_packets = 0; @@ -2150,12 +2344,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } if (!skb) { - xdp.data = pktbuf + pkt_offset; - xdp.data_end = xdp.data + size; - xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring); - xdp_set_data_meta_invalid(&xdp); - xdp.frame_sz = truesize; - xdp.rxq = &rx_ring->xdp_rxq; + xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); + xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), + igc_rx_offset(rx_ring) + pkt_offset, size, false); skb = igc_xdp_run_prog(adapter, &xdp); } @@ -2225,12 +2416,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; - u64_stats_update_begin(&rx_ring->rx_syncp); - rx_ring->rx_stats.packets += total_packets; - rx_ring->rx_stats.bytes += total_bytes; - u64_stats_update_end(&rx_ring->rx_syncp); - q_vector->rx.total_packets += total_packets; - q_vector->rx.total_bytes += total_bytes; + igc_update_rx_stats(q_vector, total_packets, total_bytes); if (cleaned_count) igc_alloc_rx_buffers(rx_ring, cleaned_count); @@ -2238,6 +2424,221 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) return total_packets; } +static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + unsigned int totalsize = metasize + datasize; + struct sk_buff *skb; + + skb = __napi_alloc_skb(&ring->q_vector->napi, + xdp->data_end - xdp->data_hard_start, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); + memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); + if (metasize) + skb_metadata_set(skb, metasize); + + return skb; +} + +static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, + union igc_adv_rx_desc *desc, + struct xdp_buff *xdp, + ktime_t timestamp) +{ + struct igc_ring *ring = q_vector->rx.ring; + struct sk_buff *skb; + + skb = igc_construct_skb_zc(ring, xdp); + if (!skb) { + ring->rx_stats.alloc_failed++; + return; + } + + if (timestamp) + skb_hwtstamps(skb)->hwtstamp = timestamp; + + if (igc_cleanup_headers(ring, desc, skb)) + return; + + igc_process_skb_fields(ring, desc, skb); + napi_gro_receive(&q_vector->napi, skb); +} + +static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_ring *ring = q_vector->rx.ring; + u16 cleaned_count = igc_desc_unused(ring); + int total_bytes = 0, total_packets = 0; + u16 ntc = ring->next_to_clean; + struct bpf_prog *prog; + bool failure = false; + int xdp_status = 0; + + rcu_read_lock(); + + prog = READ_ONCE(adapter->xdp_prog); + + while (likely(total_packets < budget)) { + union igc_adv_rx_desc *desc; + struct igc_rx_buffer *bi; + ktime_t timestamp = 0; + unsigned int size; + int res; + + desc = IGC_RX_DESC(ring, ntc); + size = le16_to_cpu(desc->wb.upper.length); + if (!size) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * descriptor has been written back + */ + dma_rmb(); + + bi = &ring->rx_buffer_info[ntc]; + + if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { + timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, + bi->xdp->data); + + bi->xdp->data += IGC_TS_HDR_LEN; + + /* HW timestamp has been copied into local variable. Metadata + * length when XDP program is called should be 0. + */ + bi->xdp->data_meta += IGC_TS_HDR_LEN; + size -= IGC_TS_HDR_LEN; + } + + bi->xdp->data_end = bi->xdp->data + size; + xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool); + + res = __igc_xdp_run_prog(adapter, prog, bi->xdp); + switch (res) { + case IGC_XDP_PASS: + igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); + fallthrough; + case IGC_XDP_CONSUMED: + xsk_buff_free(bi->xdp); + break; + case IGC_XDP_TX: + case IGC_XDP_REDIRECT: + xdp_status |= res; + break; + } + + bi->xdp = NULL; + total_bytes += size; + total_packets++; + cleaned_count++; + ntc++; + if (ntc == ring->count) + ntc = 0; + } + + ring->next_to_clean = ntc; + rcu_read_unlock(); + + if (cleaned_count >= IGC_RX_BUFFER_WRITE) + failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); + + if (xdp_status) + igc_finalize_xdp(adapter, xdp_status); + + igc_update_rx_stats(q_vector, total_packets, total_bytes); + + if (xsk_uses_need_wakeup(ring->xsk_pool)) { + if (failure || ring->next_to_clean == ring->next_to_use) + xsk_set_rx_need_wakeup(ring->xsk_pool); + else + xsk_clear_rx_need_wakeup(ring->xsk_pool); + return total_packets; + } + + return failure ? budget : total_packets; +} + +static void igc_update_tx_stats(struct igc_q_vector *q_vector, + unsigned int packets, unsigned int bytes) +{ + struct igc_ring *ring = q_vector->tx.ring; + + u64_stats_update_begin(&ring->tx_syncp); + ring->tx_stats.bytes += bytes; + ring->tx_stats.packets += packets; + u64_stats_update_end(&ring->tx_syncp); + + q_vector->tx.total_bytes += bytes; + q_vector->tx.total_packets += packets; +} + +static void igc_xdp_xmit_zc(struct igc_ring *ring) +{ + struct xsk_buff_pool *pool = ring->xsk_pool; + struct netdev_queue *nq = txring_txq(ring); + union igc_adv_tx_desc *tx_desc = NULL; + int cpu = smp_processor_id(); + u16 ntu = ring->next_to_use; + struct xdp_desc xdp_desc; + u16 budget; + + if (!netif_carrier_ok(ring->netdev)) + return; + + __netif_tx_lock(nq, cpu); + + budget = igc_desc_unused(ring); + + while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { + u32 cmd_type, olinfo_status; + struct igc_tx_buffer *bi; + dma_addr_t dma; + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | + xdp_desc.len; + olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; + + dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); + + tx_desc = IGC_TX_DESC(ring, ntu); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + bi = &ring->tx_buffer_info[ntu]; + bi->type = IGC_TX_BUFFER_TYPE_XSK; + bi->protocol = 0; + bi->bytecount = xdp_desc.len; + bi->gso_segs = 1; + bi->time_stamp = jiffies; + bi->next_to_watch = tx_desc; + + netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); + + ntu++; + if (ntu == ring->count) + ntu = 0; + } + + ring->next_to_use = ntu; + if (tx_desc) { + igc_flush_tx_descriptors(ring); + xsk_tx_release(pool); + } + + __netif_tx_unlock(nq); +} + /** * igc_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info @@ -2254,6 +2655,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) unsigned int i = tx_ring->next_to_clean; struct igc_tx_buffer *tx_buffer; union igc_adv_tx_desc *tx_desc; + u32 xsk_frames = 0; if (test_bit(__IGC_DOWN, &adapter->state)) return true; @@ -2283,19 +2685,22 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; - if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP) + switch (tx_buffer->type) { + case IGC_TX_BUFFER_TYPE_XSK: + xsk_frames++; + break; + case IGC_TX_BUFFER_TYPE_XDP: xdp_return_frame(tx_buffer->xdpf); - else + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); + break; + case IGC_TX_BUFFER_TYPE_SKB: napi_consume_skb(tx_buffer->skb, napi_budget); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - - /* clear tx_buffer data */ - dma_unmap_len_set(tx_buffer, len, 0); + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); + break; + default: + netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); + break; + } /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { @@ -2309,13 +2714,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) } /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); - } + if (dma_unmap_len(tx_buffer, len)) + igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); } /* move us one more past the eop_desc for start of next pkt */ @@ -2340,12 +2740,16 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) i += tx_ring->count; tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->tx_syncp); - tx_ring->tx_stats.bytes += total_bytes; - tx_ring->tx_stats.packets += total_packets; - u64_stats_update_end(&tx_ring->tx_syncp); - q_vector->tx.total_bytes += total_bytes; - q_vector->tx.total_packets += total_packets; + + igc_update_tx_stats(q_vector, total_packets, total_bytes); + + if (tx_ring->xsk_pool) { + if (xsk_frames) + xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); + if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) + xsk_set_tx_need_wakeup(tx_ring->xsk_pool); + igc_xdp_xmit_zc(tx_ring); + } if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct igc_hw *hw = &adapter->hw; @@ -2906,6 +3310,8 @@ static void igc_configure(struct igc_adapter *adapter) igc_get_hw_control(adapter); igc_set_rx_mode(netdev); + igc_restore_vlan(adapter); + igc_setup_tctl(adapter); igc_setup_mrqc(adapter); igc_setup_rctl(adapter); @@ -2925,7 +3331,10 @@ static void igc_configure(struct igc_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct igc_ring *ring = adapter->rx_ring[i]; - igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); + if (ring->xsk_pool) + igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); + else + igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); } } @@ -3540,14 +3949,17 @@ static int igc_poll(struct napi_struct *napi, int budget) struct igc_q_vector *q_vector = container_of(napi, struct igc_q_vector, napi); + struct igc_ring *rx_ring = q_vector->rx.ring; bool clean_complete = true; int work_done = 0; if (q_vector->tx.ring) clean_complete = igc_clean_tx_irq(q_vector, budget); - if (q_vector->rx.ring) { - int cleaned = igc_clean_rx_irq(q_vector, budget); + if (rx_ring) { + int cleaned = rx_ring->xsk_pool ? + igc_clean_rx_irq_zc(q_vector, budget) : + igc_clean_rx_irq(q_vector, budget); work_done += cleaned; if (cleaned >= budget) @@ -4199,6 +4611,9 @@ static int igc_set_features(struct net_device *netdev, netdev_features_t changed = netdev->features ^ features; struct igc_adapter *adapter = netdev_priv(netdev); + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + igc_vlan_mode(netdev, features); + /* Add VLAN support */ if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; @@ -5185,6 +5600,9 @@ static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) switch (bpf->command) { case XDP_SETUP_PROG: return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); + case XDP_SETUP_XSK_POOL: + return igc_xdp_setup_pool(adapter, bpf->xsk.pool, + bpf->xsk.queue_id); default: return -EOPNOTSUPP; } @@ -5230,6 +5648,43 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, return num_frames - drops; } +static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, + struct igc_q_vector *q_vector) +{ + struct igc_hw *hw = &adapter->hw; + u32 eics = 0; + + eics |= q_vector->eims_value; + wr32(IGC_EICS, eics); +} + +int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct igc_adapter *adapter = netdev_priv(dev); + struct igc_q_vector *q_vector; + struct igc_ring *ring; + + if (test_bit(__IGC_DOWN, &adapter->state)) + return -ENETDOWN; + + if (!igc_xdp_is_enabled(adapter)) + return -ENXIO; + + if (queue_id >= adapter->num_rx_queues) + return -EINVAL; + + ring = adapter->rx_ring[queue_id]; + + if (!ring->xsk_pool) + return -ENXIO; + + q_vector = adapter->q_vector[queue_id]; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + igc_trigger_rxtxq_interrupt(adapter, q_vector); + + return 0; +} + static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, @@ -5245,6 +5700,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_setup_tc = igc_setup_tc, .ndo_bpf = igc_bpf, .ndo_xdp_xmit = igc_xdp_xmit, + .ndo_xsk_wakeup = igc_xsk_wakeup, }; /* PCIe configuration access */ @@ -5484,11 +5940,15 @@ static int igc_probe(struct pci_dev *pdev, /* copy netdev features into list of user selectable features */ netdev->hw_features |= NETIF_F_NTUPLE; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= netdev->features; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + netdev->vlan_features |= netdev->features; + /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; @@ -5997,6 +6457,61 @@ struct net_device *igc_get_hw_dev(struct igc_hw *hw) return adapter->netdev; } +static void igc_disable_rx_ring_hw(struct igc_ring *ring) +{ + struct igc_hw *hw = &ring->q_vector->adapter->hw; + u8 idx = ring->reg_idx; + u32 rxdctl; + + rxdctl = rd32(IGC_RXDCTL(idx)); + rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; + rxdctl |= IGC_RXDCTL_SWFLUSH; + wr32(IGC_RXDCTL(idx), rxdctl); +} + +void igc_disable_rx_ring(struct igc_ring *ring) +{ + igc_disable_rx_ring_hw(ring); + igc_clean_rx_ring(ring); +} + +void igc_enable_rx_ring(struct igc_ring *ring) +{ + struct igc_adapter *adapter = ring->q_vector->adapter; + + igc_configure_rx_ring(adapter, ring); + + if (ring->xsk_pool) + igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); + else + igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); +} + +static void igc_disable_tx_ring_hw(struct igc_ring *ring) +{ + struct igc_hw *hw = &ring->q_vector->adapter->hw; + u8 idx = ring->reg_idx; + u32 txdctl; + + txdctl = rd32(IGC_TXDCTL(idx)); + txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; + txdctl |= IGC_TXDCTL_SWFLUSH; + wr32(IGC_TXDCTL(idx), txdctl); +} + +void igc_disable_tx_ring(struct igc_ring *ring) +{ + igc_disable_tx_ring_hw(ring); + igc_clean_tx_ring(ring); +} + +void igc_enable_tx_ring(struct igc_ring *ring) +{ + struct igc_adapter *adapter = ring->q_vector->adapter; + + igc_configure_tx_ring(adapter, ring); +} + /** * igc_init_module - Driver Registration Routine * diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index cc174853554b..0f82990567d9 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -10,8 +10,8 @@ #define IGC_EECD 0x00010 /* EEPROM/Flash Control - RW */ #define IGC_CTRL_EXT 0x00018 /* Extended Device Control - RW */ #define IGC_MDIC 0x00020 /* MDI Control - RW */ -#define IGC_MDICNFG 0x00E04 /* MDC/MDIO Configuration - RW */ #define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ +#define IGC_VET 0x00038 /* VLAN Ether Type - RW */ #define IGC_I225_PHPM 0x00E14 /* I225 PHY Power Management */ #define IGC_GPHY_VERSION 0x0001E /* I225 gPHY Firmware Version */ diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index 11133c4619bb..a8cf5374be47 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020, Intel Corporation. */ +#include <net/xdp_sock_drv.h> + #include "igc.h" #include "igc_xdp.h" @@ -32,29 +34,112 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, return 0; } -int igc_xdp_register_rxq_info(struct igc_ring *ring) +static int igc_xdp_enable_pool(struct igc_adapter *adapter, + struct xsk_buff_pool *pool, u16 queue_id) { - struct net_device *dev = ring->netdev; + struct net_device *ndev = adapter->netdev; + struct device *dev = &adapter->pdev->dev; + struct igc_ring *rx_ring, *tx_ring; + struct napi_struct *napi; + bool needs_reset; + u32 frame_size; int err; - err = xdp_rxq_info_reg(&ring->xdp_rxq, dev, ring->queue_index, 0); - if (err) { - netdev_err(dev, "Failed to register xdp rxq info\n"); - return err; + if (queue_id >= adapter->num_rx_queues || + queue_id >= adapter->num_tx_queues) + return -EINVAL; + + frame_size = xsk_pool_get_rx_frame_size(pool); + if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2) { + /* When XDP is enabled, the driver doesn't support frames that + * span over multiple buffers. To avoid that, we check if xsk + * frame size is big enough to fit the max ethernet frame size + * + vlan double tagging. + */ + return -EOPNOTSUPP; } - err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, - NULL); + err = xsk_pool_dma_map(pool, dev, IGC_RX_DMA_ATTR); if (err) { - netdev_err(dev, "Failed to register xdp rxq mem model\n"); - xdp_rxq_info_unreg(&ring->xdp_rxq); + netdev_err(ndev, "Failed to map xsk pool\n"); return err; } + needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter); + + rx_ring = adapter->rx_ring[queue_id]; + tx_ring = adapter->tx_ring[queue_id]; + /* Rx and Tx rings share the same napi context. */ + napi = &rx_ring->q_vector->napi; + + if (needs_reset) { + igc_disable_rx_ring(rx_ring); + igc_disable_tx_ring(tx_ring); + napi_disable(napi); + } + + set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags); + set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags); + + if (needs_reset) { + napi_enable(napi); + igc_enable_rx_ring(rx_ring); + igc_enable_tx_ring(tx_ring); + + err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX); + if (err) { + xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR); + return err; + } + } + + return 0; +} + +static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id) +{ + struct igc_ring *rx_ring, *tx_ring; + struct xsk_buff_pool *pool; + struct napi_struct *napi; + bool needs_reset; + + if (queue_id >= adapter->num_rx_queues || + queue_id >= adapter->num_tx_queues) + return -EINVAL; + + pool = xsk_get_pool_from_qid(adapter->netdev, queue_id); + if (!pool) + return -EINVAL; + + needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter); + + rx_ring = adapter->rx_ring[queue_id]; + tx_ring = adapter->tx_ring[queue_id]; + /* Rx and Tx rings share the same napi context. */ + napi = &rx_ring->q_vector->napi; + + if (needs_reset) { + igc_disable_rx_ring(rx_ring); + igc_disable_tx_ring(tx_ring); + napi_disable(napi); + } + + xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR); + clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags); + clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags); + + if (needs_reset) { + napi_enable(napi); + igc_enable_rx_ring(rx_ring); + igc_enable_tx_ring(tx_ring); + } + return 0; } -void igc_xdp_unregister_rxq_info(struct igc_ring *ring) +int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool, + u16 queue_id) { - xdp_rxq_info_unreg(&ring->xdp_rxq); + return pool ? igc_xdp_enable_pool(adapter, pool, queue_id) : + igc_xdp_disable_pool(adapter, queue_id); } diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h index cfecb515b718..a74e5487d199 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.h +++ b/drivers/net/ethernet/intel/igc/igc_xdp.h @@ -6,8 +6,12 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, struct netlink_ext_ack *extack); +int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool, + u16 queue_id); -int igc_xdp_register_rxq_info(struct igc_ring *ring); -void igc_xdp_unregister_rxq_info(struct igc_ring *ring); +static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter) +{ + return !!adapter->xdp_prog; +} #endif /* _IGC_XDP_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index e324e42fab2d..58ea959a4482 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -1514,8 +1514,7 @@ static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask) #define IXGBE_WRITE_REG_BE32(a, reg, value) \ IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(ntohl(value))) -#define IXGBE_STORE_AS_BE16(_value) \ - ntohs(((u16)(_value) >> 8) | ((u16)(_value) << 8)) +#define IXGBE_STORE_AS_BE16(_value) __swab16(ntohs((_value))) s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, union ixgbe_atr_input *input_mask) @@ -1651,13 +1650,13 @@ s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]); /* record source and destination port (little-endian)*/ - fdirport = ntohs(input->formatted.dst_port); + fdirport = be16_to_cpu(input->formatted.dst_port); fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT; - fdirport |= ntohs(input->formatted.src_port); + fdirport |= be16_to_cpu(input->formatted.src_port); IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport); /* record vlan (little-endian) and flex_bytes(big-endian) */ - fdirvlan = IXGBE_STORE_AS_BE16((__force u16)input->formatted.flex_bytes); + fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes); fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT; fdirvlan |= ntohs(input->formatted.vlan_id); IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 03ccbe6b66d2..e90b5047e695 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -3678,10 +3678,8 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, bool return_data) { u32 hdr_size = sizeof(struct ixgbe_hic_hdr); - union { - struct ixgbe_hic_hdr hdr; - u32 u32arr[1]; - } *bp = buffer; + struct ixgbe_hic_hdr *hdr = buffer; + u32 *u32arr = buffer; u16 buf_len, dword_len; s32 status; u32 bi; @@ -3707,12 +3705,12 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, /* first pull in the header so we know the buffer length */ for (bi = 0; bi < dword_len; bi++) { - bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); - le32_to_cpus(&bp->u32arr[bi]); + u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); + le32_to_cpus(&u32arr[bi]); } /* If there is any thing in data position pull it in */ - buf_len = bp->hdr.buf_len; + buf_len = hdr->buf_len; if (!buf_len) goto rel_out; @@ -3727,8 +3725,8 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, /* Pull in the rest of the buffer (bi is where we left off) */ for (; bi <= dword_len; bi++) { - bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); - le32_to_cpus(&bp->u32arr[bi]); + u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); + le32_to_cpus(&u32arr[bi]); } rel_out: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 54d47265a7ac..e596e1a9fc75 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -511,14 +511,14 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) continue; reg = IXGBE_READ_REG(hw, MIPAF_ARR(3, i)); - if (reg == xs->id.daddr.a4) + if (reg == (__force u32)xs->id.daddr.a4) return 1; } } if ((bmcipval & BMCIP_MASK) == BMCIP_V4) { reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(3)); - if (reg == xs->id.daddr.a4) + if (reg == (__force u32)xs->id.daddr.a4) return 1; } @@ -533,7 +533,7 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) for (j = 0; j < 4; j++) { reg = IXGBE_READ_REG(hw, MIPAF_ARR(i, j)); - if (reg != xs->id.daddr.a6[j]) + if (reg != (__force u32)xs->id.daddr.a6[j]) break; } if (j == 4) /* did we match all 4 words? */ @@ -543,7 +543,7 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) if ((bmcipval & BMCIP_MASK) == BMCIP_V6) { for (j = 0; j < 4; j++) { reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(j)); - if (reg != xs->id.daddr.a6[j]) + if (reg != (__force u32)xs->id.daddr.a6[j]) break; } if (j == 4) /* did we match all 4 words? */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2ac5b82676f3..ffff69efd78a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2199,7 +2199,6 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct xdp_frame *xdpf; u32 act; - rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (!xdp_prog) @@ -2237,7 +2236,6 @@ out_failure: break; } xdp_out: - rcu_read_unlock(); return ERR_PTR(-result); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index f72d2978263b..96dd1a4f956a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -100,7 +100,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, struct xdp_frame *xdpf; u32 act; - rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); @@ -108,7 +107,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (err) goto out_failure; - rcu_read_unlock(); return IXGBE_XDP_REDIR; } @@ -134,7 +132,6 @@ out_failure: result = IXGBE_XDP_CONSUMED; break; } - rcu_read_unlock(); return result; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0e733cc15c58..c714e1ecd308 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1054,7 +1054,6 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, struct bpf_prog *xdp_prog; u32 act; - rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (!xdp_prog) @@ -1082,7 +1081,6 @@ out_failure: break; } xdp_out: - rcu_read_unlock(); return ERR_PTR(-result); } @@ -3817,7 +3815,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; |