From ccab434e674ca95d483788b1895a70c21b7f016a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Nov 2023 11:08:57 +0100 Subject: usb: aqc111: check packet for fixup for true limit If a device sends a packet that is inbetween 0 and sizeof(u64) the value passed to skb_trim() as length will wrap around ending up as some very large value. The driver will then proceed to parse the header located at that position, which will either oops or process some random value. The fix is to check against sizeof(u64) rather than 0, which the driver currently does. The issue exists since the introduction of the driver. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/aqc111.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index a017e9de2119..7b8afa589a53 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1079,17 +1079,17 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 pkt_count = 0; u64 desc_hdr = 0; u16 vlan_tag = 0; - u32 skb_len = 0; + u32 skb_len; if (!skb) goto err; - if (skb->len == 0) + skb_len = skb->len; + if (skb_len < sizeof(desc_hdr)) goto err; - skb_len = skb->len; /* RX Descriptor Header */ - skb_trim(skb, skb->len - sizeof(desc_hdr)); + skb_trim(skb, skb_len - sizeof(desc_hdr)); desc_hdr = le64_to_cpup((u64 *)skb_tail_pointer(skb)); /* Check these packets */ -- cgit v1.2.3 From 7fbd5fc2b35a8f559a6b380dfa9bcd964a758186 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 15 Nov 2023 11:53:31 +0100 Subject: stmmac: dwmac-loongson: Add architecture dependency Only present the DWMAC_LOONGSON option on architectures where it can actually be used. This follows the same logic as the DWMAC_INTEL option. Signed-off-by: Jean Delvare Cc: Keguang Zhang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index a2b9e289aa36..85dcda51df05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -280,7 +280,7 @@ config DWMAC_INTEL config DWMAC_LOONGSON tristate "Loongson PCI DWMAC support" default MACH_LOONGSON64 - depends on STMMAC_ETH && PCI + depends on (MACH_LOONGSON64 || COMPILE_TEST) && STMMAC_ETH && PCI depends on COMMON_CLK help This selects the LOONGSON PCI bus support for the stmmac driver, -- cgit v1.2.3 From d565fa4300d9ebd5ba3bbd259ce841f8dab609d6 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Wed, 15 Nov 2023 16:59:58 +0100 Subject: s390/ism: ism driver implies smc protocol Since commit a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") you can build the ism code without selecting the SMC network protocol. That leaves some ism functions be reported as unused. Move these functions under the conditional compile with CONFIG_SMC. Also codify the suggestion to also configure the SMC protocol in ism's Kconfig - but with an "imply" rather than a "select" as SMC depends on other config options and allow for a deliberate decision not to build SMC. Also, mention that in ISM's help. Fixes: a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/netdev/afd142a2-1fa0-46b9-8b2d-7652d41d3ab8@infradead.org/ Signed-off-by: Gerd Bayer Reviewed-by: Wenjia Zhang Reviewed-by: Simon Horman Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- drivers/s390/net/Kconfig | 3 +- drivers/s390/net/ism_drv.c | 93 +++++++++++++++++++++++----------------------- 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index 4902d45e929c..c61e6427384c 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -103,10 +103,11 @@ config CCWGROUP config ISM tristate "Support for ISM vPCI Adapter" depends on PCI + imply SMC default n help Select this option if you want to use the Internal Shared Memory - vPCI Adapter. + vPCI Adapter. The adapter can be used with the SMC network protocol. To compile as a module choose M. The module name is ism. If unsure, choose N. diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 6df7f377d2f9..81aabbfbbe2c 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -30,7 +30,6 @@ static const struct pci_device_id ism_device_table[] = { MODULE_DEVICE_TABLE(pci, ism_device_table); static debug_info_t *ism_debug_info; -static const struct smcd_ops ism_ops; #define NO_CLIENT 0xff /* must be >= MAX_CLIENTS */ static struct ism_client *clients[MAX_CLIENTS]; /* use an array rather than */ @@ -289,22 +288,6 @@ out: return ret; } -static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, - u32 vid) -{ - union ism_query_rgid cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.request.hdr.cmd = ISM_QUERY_RGID; - cmd.request.hdr.len = sizeof(cmd.request); - - cmd.request.rgid = rgid; - cmd.request.vlan_valid = vid_valid; - cmd.request.vlan_id = vid; - - return ism_cmd(ism, &cmd); -} - static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { clear_bit(dmb->sba_idx, ism->sba_bitmap); @@ -429,23 +412,6 @@ static int ism_del_vlan_id(struct ism_dev *ism, u64 vlan_id) return ism_cmd(ism, &cmd); } -static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, - u32 event_code, u64 info) -{ - union ism_sig_ieq cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.request.hdr.cmd = ISM_SIGNAL_IEQ; - cmd.request.hdr.len = sizeof(cmd.request); - - cmd.request.rgid = rgid; - cmd.request.trigger_irq = trigger_irq; - cmd.request.event_code = event_code; - cmd.request.info = info; - - return ism_cmd(ism, &cmd); -} - static unsigned int max_bytes(unsigned int start, unsigned int len, unsigned int boundary) { @@ -503,14 +469,6 @@ u8 *ism_get_seid(void) } EXPORT_SYMBOL_GPL(ism_get_seid); -static u16 ism_get_chid(struct ism_dev *ism) -{ - if (!ism || !ism->pdev) - return 0; - - return to_zpci(ism->pdev)->pchid; -} - static void ism_handle_event(struct ism_dev *ism) { struct ism_event *entry; @@ -569,11 +527,6 @@ static irqreturn_t ism_handle_irq(int irq, void *data) return IRQ_HANDLED; } -static u64 ism_get_local_gid(struct ism_dev *ism) -{ - return ism->local_gid; -} - static int ism_dev_init(struct ism_dev *ism) { struct pci_dev *pdev = ism->pdev; @@ -774,6 +727,22 @@ module_exit(ism_exit); /*************************** SMC-D Implementation *****************************/ #if IS_ENABLED(CONFIG_SMC) +static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, + u32 vid) +{ + union ism_query_rgid cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.request.hdr.cmd = ISM_QUERY_RGID; + cmd.request.hdr.len = sizeof(cmd.request); + + cmd.request.rgid = rgid; + cmd.request.vlan_valid = vid_valid; + cmd.request.vlan_id = vid; + + return ism_cmd(ism, &cmd); +} + static int smcd_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid, u32 vid) { @@ -811,6 +780,23 @@ static int smcd_reset_vlan_required(struct smcd_dev *smcd) return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN); } +static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, + u32 event_code, u64 info) +{ + union ism_sig_ieq cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.request.hdr.cmd = ISM_SIGNAL_IEQ; + cmd.request.hdr.len = sizeof(cmd.request); + + cmd.request.rgid = rgid; + cmd.request.trigger_irq = trigger_irq; + cmd.request.event_code = event_code; + cmd.request.info = info; + + return ism_cmd(ism, &cmd); +} + static int smcd_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq, u32 event_code, u64 info) { @@ -830,11 +816,24 @@ static int smcd_supports_v2(void) SYSTEM_EID.type[0] != '0'; } +static u64 ism_get_local_gid(struct ism_dev *ism) +{ + return ism->local_gid; +} + static u64 smcd_get_local_gid(struct smcd_dev *smcd) { return ism_get_local_gid(smcd->priv); } +static u16 ism_get_chid(struct ism_dev *ism) +{ + if (!ism || !ism->pdev) + return 0; + + return to_zpci(ism->pdev)->pchid; +} + static u16 smcd_get_chid(struct smcd_dev *smcd) { return ism_get_chid(smcd->priv); -- cgit v1.2.3 From 8ba2c459668cfe2aaacc5ebcd35b4b9ef8643013 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Fri, 17 Nov 2023 18:11:08 +0800 Subject: net: wangxun: fix kernel panic due to null pointer When the device uses a custom subsystem vendor ID, the function wx_sw_init() returns before the memory of 'wx->mac_table' is allocated. The null pointer will causes the kernel panic. Fixes: 79625f45ca73 ("net: wangxun: Move MAC address handling to libwx") Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 8 +++++--- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 4 +--- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index a3c5de9d547a..533e912af089 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1769,10 +1769,12 @@ int wx_sw_init(struct wx *wx) wx->subsystem_device_id = pdev->subsystem_device; } else { err = wx_flash_read_dword(wx, 0xfffdc, &ssid); - if (!err) - wx->subsystem_device_id = swab16((u16)ssid); + if (err < 0) { + wx_err(wx, "read of internal subsystem device id failed\n"); + return err; + } - return err; + wx->subsystem_device_id = swab16((u16)ssid); } wx->mac_table = kcalloc(wx->mac.num_rar_entries, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 3d43f808c86b..8db804543e66 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -121,10 +121,8 @@ static int ngbe_sw_init(struct wx *wx) /* PCI config space info */ err = wx_sw_init(wx); - if (err < 0) { - wx_err(wx, "read of internal subsystem device id failed\n"); + if (err < 0) return err; - } /* mac type, phy type , oem type */ ngbe_init_type_code(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 70f0b5c01dac..526250102db2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -364,10 +364,8 @@ static int txgbe_sw_init(struct wx *wx) /* PCI config space info */ err = wx_sw_init(wx); - if (err < 0) { - wx_err(wx, "read of internal subsystem device id failed\n"); + if (err < 0) return err; - } txgbe_init_type_code(wx); -- cgit v1.2.3 From 93da8d75a66568ba4bb5b14ad2833acd7304cd02 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Nov 2023 14:17:33 +0000 Subject: wireguard: use DEV_STATS_INC() wg_xmit() can be called concurrently, KCSAN reported [1] some device stats updates can be lost. Use DEV_STATS_INC() for this unlikely case. [1] BUG: KCSAN: data-race in wg_xmit / wg_xmit read-write to 0xffff888104239160 of 8 bytes by task 1375 on cpu 0: wg_xmit+0x60f/0x680 drivers/net/wireguard/device.c:231 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3543 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3559 ... read-write to 0xffff888104239160 of 8 bytes by task 1378 on cpu 1: wg_xmit+0x60f/0x680 drivers/net/wireguard/device.c:231 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3543 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3559 ... v2: also change wg_packet_consume_data_done() (Hangbin Liu) and wg_packet_purge_staged_packets() Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jason A. Donenfeld Cc: Hangbin Liu Signed-off-by: Jason A. Donenfeld Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 4 ++-- drivers/net/wireguard/receive.c | 12 ++++++------ drivers/net/wireguard/send.c | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 258dcc103921..deb9636b0ecf 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -210,7 +210,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) */ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); - ++dev->stats.tx_dropped; + DEV_STATS_INC(dev, tx_dropped); } skb_queue_splice_tail(&packets, &peer->staged_packet_queue); spin_unlock_bh(&peer->staged_packet_queue.lock); @@ -228,7 +228,7 @@ err_icmp: else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); err: - ++dev->stats.tx_errors; + DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return ret; } diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 0b3f0c843550..a176653c8861 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -416,20 +416,20 @@ dishonest_packet_peer: net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", dev->name, skb, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_frame_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_frame_errors); goto packet_processed; dishonest_packet_type: net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_frame_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_frame_errors); goto packet_processed; dishonest_packet_size: net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_length_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_length_errors); goto packet_processed; packet_processed: dev_kfree_skb(skb); diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 95c853b59e1d..0d48e0f4a1ba 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -333,7 +333,8 @@ err: void wg_packet_purge_staged_packets(struct wg_peer *peer) { spin_lock_bh(&peer->staged_packet_queue.lock); - peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + DEV_STATS_ADD(peer->device->dev, tx_dropped, + peer->staged_packet_queue.qlen); __skb_queue_purge(&peer->staged_packet_queue); spin_unlock_bh(&peer->staged_packet_queue.lock); } -- cgit v1.2.3 From 5f228d7c8a539714c1e9b7e7534f76bb7979f268 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Fri, 17 Nov 2023 16:10:18 +0530 Subject: octeontx2-pf: Fix memory leak during interface down During 'ifconfig down' one RSS memory was not getting freed. This patch fixes the same. Fixes: 81a4362016e7 ("octeontx2-pf: Add RSS multi group support") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 91b99fd70361..ba95ac913274 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1934,6 +1934,8 @@ int otx2_stop(struct net_device *netdev) /* Clear RSS enable flag */ rss = &pf->hw.rss_info; rss->enable = false; + if (!netif_is_rxfh_configured(netdev)) + kfree(rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]); /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, -- cgit v1.2.3 From 79e0c5be8c73a674c92bd4ba77b75f4f8c91d32e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:13 +0100 Subject: net, vrf: Move dstats structure to core Just move struct pcpu_dstats out of the vrf into the core, and streamline the field names slightly, so they better align with the {t,l}stats ones. No functional change otherwise. A conversion of the u64s to u64_stats_t could be done at a separate point in future. This move is needed as we are moving the {t,l,d}stats allocation/freeing to the core. Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: Jakub Kicinski Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/vrf.c | 24 +++++++----------------- include/linux/netdevice.h | 10 ++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index db766941b78f..3e6e0fdc3ba7 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -121,22 +121,12 @@ struct net_vrf { int ifindex; }; -struct pcpu_dstats { - u64 tx_pkts; - u64 tx_bytes; - u64 tx_drps; - u64 rx_pkts; - u64 rx_bytes; - u64 rx_drps; - struct u64_stats_sync syncp; -}; - static void vrf_rx_stats(struct net_device *dev, int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; + dstats->rx_packets++; dstats->rx_bytes += len; u64_stats_update_end(&dstats->syncp); } @@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev, do { start = u64_stats_fetch_begin(&dstats->syncp); tbytes = dstats->tx_bytes; - tpkts = dstats->tx_pkts; - tdrops = dstats->tx_drps; + tpkts = dstats->tx_packets; + tdrops = dstats->tx_drops; rbytes = dstats->rx_bytes; - rpkts = dstats->rx_pkts; + rpkts = dstats->rx_packets; } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; @@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) vrf_rx_stats(dev, len); else - this_cpu_inc(dev->dstats->rx_drps); + this_cpu_inc(dev->dstats->rx_drops); return NETDEV_TX_OK; } @@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->tx_pkts++; + dstats->tx_packets++; dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { - this_cpu_inc(dev->dstats->tx_drps); + this_cpu_inc(dev->dstats->tx_drops); } return ret; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a16c9cc063fe..98082113156e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2755,6 +2755,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; -- cgit v1.2.3 From 34d21de99cea9cb17967874313e5b0262527833c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:14 +0100 Subject: net: Move {l,t,d}stats allocation to core and convert veth & vrf Move {l,t,d}stats allocation to the core and let netdevs pick the stats type they need. That way the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc) - all happening in the core. Co-developed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/veth.c | 16 ++-------------- drivers/net/vrf.c | 14 +++----------- include/linux/netdevice.h | 20 +++++++++++++++---- net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 9980517ed8b0..ac030c241d1a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1506,25 +1506,12 @@ static void veth_free_queues(struct net_device *dev) static int veth_dev_init(struct net_device *dev) { - int err; - - dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); - if (!dev->lstats) - return -ENOMEM; - - err = veth_alloc_queues(dev); - if (err) { - free_percpu(dev->lstats); - return err; - } - - return 0; + return veth_alloc_queues(dev); } static void veth_dev_free(struct net_device *dev) { veth_free_queues(dev); - free_percpu(dev->lstats); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1796,6 +1783,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3e6e0fdc3ba7..bb95ce43cd97 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1164,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev) vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); - - free_percpu(dev->dstats); - dev->dstats = NULL; } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); - if (!dev->dstats) - goto out_nomem; - /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) - goto out_stats; + goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; @@ -1193,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev) out_rth: vrf_rtable_release(dev, vrf); -out_stats: - free_percpu(dev->dstats); - dev->dstats = NULL; out_nomem: return -ENOMEM; } @@ -1694,6 +1684,8 @@ static void vrf_setup(struct net_device *dev) dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; + + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 98082113156e..2564e209465e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -2354,6 +2365,7 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; + enum netdev_stat_type pcpu_stat_type:8; union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; diff --git a/net/core/dev.c b/net/core/dev.c index af53f6d838ce..0cc6e283edba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10051,6 +10051,46 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); +static int netdev_do_alloc_pcpu_stats(struct net_device *dev) +{ + void __percpu *v; + + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return 0; + case NETDEV_PCPU_STAT_LSTATS: + v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); + break; + default: + return -EINVAL; + } + + return v ? 0 : -ENOMEM; +} + +static void netdev_do_free_pcpu_stats(struct net_device *dev) +{ + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return; + case NETDEV_PCPU_STAT_LSTATS: + free_percpu(dev->lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + free_percpu(dev->tstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + free_percpu(dev->dstats); + break; + } +} + /** * register_netdevice() - register a network device * @dev: device to register @@ -10111,9 +10151,13 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } + ret = netdev_do_alloc_pcpu_stats(dev); + if (ret) + goto err_uninit; + ret = dev_index_reserve(net, dev->ifindex); if (ret < 0) - goto err_uninit; + goto err_free_pcpu; dev->ifindex = ret; /* Transfer changeable features to wanted_features and enable @@ -10219,6 +10263,8 @@ err_uninit_notify: call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); err_ifindex_release: dev_index_release(net, dev->ifindex); +err_free_pcpu: + netdev_do_free_pcpu_stats(dev); err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); @@ -10471,6 +10517,7 @@ void netdev_run_todo(void) WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); + netdev_do_free_pcpu_stats(dev); if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) -- cgit v1.2.3 From ae1658272c6491a31ac968e39882fc569f312ac3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:15 +0100 Subject: netkit: Add tstats per-CPU traffic counters Add dev->tstats traffic accounting to netkit. The latter contains per-CPU RX and TX counters. The dev's TX counters are bumped upon pass/unspec as well as redirect verdicts, in other words, on everything except for drops. The dev's RX counters are bumped upon successful __netif_rx(), as well as from skb_do_redirect() (not part of this commit here). Using dev->lstats with having just a single packets/bytes counter and inferring one another's RX counters from the peer dev's lstats is not possible given skb_do_redirect() can also bump the device's stats. Signed-off-by: Daniel Borkmann Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-4-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 5a0f86f38f09..99de11f9cde5 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -68,6 +68,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_t ret_dev = NET_XMIT_SUCCESS; const struct bpf_mprog_entry *entry; struct net_device *peer; + int len = skb->len; rcu_read_lock(); peer = rcu_dereference(nk->peer); @@ -85,15 +86,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) case NETKIT_PASS: skb->protocol = eth_type_trans(skb, skb->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - __netif_rx(skb); + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { + dev_sw_netstats_tx_add(dev, 1, len); + dev_sw_netstats_rx_add(peer, len); + } else { + goto drop_stats; + } break; case NETKIT_REDIRECT: + dev_sw_netstats_tx_add(dev, 1, len); skb_do_redirect(skb); break; case NETKIT_DROP: default: drop: kfree_skb(skb); +drop_stats: dev_core_stats_tx_dropped_inc(dev); ret_dev = NET_XMIT_DROP; break; @@ -174,6 +182,13 @@ static struct net_device *netkit_peer_dev(struct net_device *dev) return rcu_dereference(netkit_priv(dev)->peer); } +static void netkit_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + dev_fetch_sw_netstats(stats, dev->tstats); + stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); +} + static void netkit_uninit(struct net_device *dev); static const struct net_device_ops netkit_netdev_ops = { @@ -184,6 +199,7 @@ static const struct net_device_ops netkit_netdev_ops = { .ndo_set_rx_headroom = netkit_set_headroom, .ndo_get_iflink = netkit_get_iflink, .ndo_get_peer_dev = netkit_peer_dev, + .ndo_get_stats64 = netkit_get_stats, .ndo_uninit = netkit_uninit, .ndo_features_check = passthru_features_check, }; @@ -218,6 +234,7 @@ static void netkit_setup(struct net_device *dev) ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->flags |= IFF_NOARP; dev->priv_flags &= ~IFF_TX_SKB_SHARING; -- cgit v1.2.3 From 6f2684bf2b4460c84d0d34612a939f78b96b03fc Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 14 Nov 2023 01:42:16 +0100 Subject: veth: Use tstats per-CPU traffic counters Currently veth devices use the lstats per-CPU traffic counters, which only cover TX traffic. veth_get_stats64() actually populates RX stats of a veth device from its peer's TX counters, based on the assumption that a veth device can _only_ receive packets from its peer, which is no longer true: For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in a different netns), skipping veth's peer device. Unfortunately, this kind of traffic isn't currently accounted for in veth's RX stats. In preparation for the fix, use tstats (instead of lstats) to maintain both RX and TX counters for each veth device. We'll use RX counters for bpf_redirect_peer() traffic, and keep using TX counters for the usual "peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by _adding_ RX count to peer's TX count, in order to cover both kinds of traffic. veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()") for less confusion, but let's leave it to another patch to keep the fix minimal. Signed-off-by: Peilin Ye Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/veth.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index ac030c241d1a..6cc352296c67 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { if (!use_napi) - dev_lstats_add(dev, length); + dev_sw_netstats_tx_add(dev, 1, length); else __veth_xdp_flush(rq); } else { @@ -387,14 +387,6 @@ drop: return ret; } -static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) -{ - struct veth_priv *priv = netdev_priv(dev); - - dev_lstats_read(dev, packets, bytes); - return atomic64_read(&priv->dropped); -} - static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev, struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; struct veth_stats rx; - u64 packets, bytes; - tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); - tot->tx_bytes = bytes; - tot->tx_packets = packets; + tot->tx_dropped = atomic64_read(&priv->dropped); + dev_fetch_sw_netstats(tot, dev->tstats); veth_stats_rx(&rx, dev); tot->tx_dropped += rx.xdp_tx_err; tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; - tot->rx_bytes = rx.xdp_bytes; - tot->rx_packets = rx.xdp_packets; + tot->rx_bytes += rx.xdp_bytes; + tot->rx_packets += rx.xdp_packets; rcu_read_lock(); peer = rcu_dereference(priv->peer); if (peer) { - veth_stats_tx(peer, &packets, &bytes); - tot->rx_bytes += bytes; - tot->rx_packets += packets; + struct rtnl_link_stats64 tot_peer = {}; + + dev_fetch_sw_netstats(&tot_peer, peer->tstats); + tot->rx_bytes += tot_peer.tx_bytes; + tot->rx_packets += tot_peer.tx_packets; veth_stats_rx(&rx, peer); tot->tx_dropped += rx.peer_tq_xdp_xmit_err; @@ -1783,7 +1775,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; -- cgit v1.2.3 From 2c225425704078282e152ba692649237f78b3d7a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:18 +0100 Subject: bpf, netkit: Add indirect call wrapper for fetching peer dev ndo_get_peer_dev is used in tcx BPF fast path, therefore make use of indirect call wrapper and therefore optimize the bpf_redirect_peer() internal handling a bit. Add a small skb_get_peer_dev() wrapper which utilizes the INDIRECT_CALL_1() macro instead of open coding. Future work could potentially add a peer pointer directly into struct net_device in future and convert veth and netkit over to use it so that eventually ndo_get_peer_dev can be removed. Co-developed-by: Nikolay Aleksandrov Signed-off-by: Nikolay Aleksandrov Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231114004220.6495-7-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 3 ++- include/net/netkit.h | 6 ++++++ net/core/filter.c | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 99de11f9cde5..97bd6705c241 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,7 @@ out: rcu_read_unlock(); } -static struct net_device *netkit_peer_dev(struct net_device *dev) +INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev) { return rcu_dereference(netkit_priv(dev)->peer); } diff --git a/include/net/netkit.h b/include/net/netkit.h index 0ba2e6b847ca..9ec0163739f4 100644 --- a/include/net/netkit.h +++ b/include/net/netkit.h @@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev)); #else static inline int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline struct net_device *netkit_peer_dev(struct net_device *dev) +{ + return NULL; +} #endif /* CONFIG_NETKIT */ #endif /* __NET_NETKIT_H */ diff --git a/net/core/filter.c b/net/core/filter.c index cca810987c8d..7e4d7c3bcc84 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include "dev.h" @@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); +static struct net_device *skb_get_peer_dev(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (likely(ops->ndo_get_peer_dev)) + return INDIRECT_CALL_1(ops->ndo_get_peer_dev, + netkit_peer_dev, dev); + return NULL; +} + int skb_do_redirect(struct sk_buff *skb) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -2481,12 +2492,9 @@ int skb_do_redirect(struct sk_buff *skb) if (unlikely(!dev)) goto out_drop; if (flags & BPF_F_PEER) { - const struct net_device_ops *ops = dev->netdev_ops; - - if (unlikely(!ops->ndo_get_peer_dev || - !skb_at_tc_ingress(skb))) + if (unlikely(!skb_at_tc_ingress(skb))) goto out_drop; - dev = ops->ndo_get_peer_dev(dev); + dev = skb_get_peer_dev(dev); if (unlikely(!dev || !(dev->flags & IFF_UP) || net_eq(net, dev_net(dev)))) -- cgit v1.2.3 From d30fb712e52964f2cf9a9c14cf67078394044837 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 19 Nov 2023 08:23:41 -0800 Subject: hv_netvsc: fix race of netvsc and VF register_netdevice The rtnl lock also needs to be held before rndis_filter_device_add() which advertises nvsp_2_vsc_capability / sriov bit, and triggers VF NIC offering and registering. If VF NIC finished register_netdev() earlier it may cause name based config failure. To fix this issue, move the call to rtnl_lock() before rndis_filter_device_add(), so VF will be registered later than netvsc / synthetic NIC, and gets a name numbered (ethX) after netvsc. Cc: stable@vger.kernel.org Fixes: e04e7a7bbd4b ("hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()") Reported-by: Dexuan Cui Signed-off-by: Haiyang Zhang Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Reviewed-by: Dexuan Cui Signed-off-by: Paolo Abeni --- drivers/net/hyperv/netvsc_drv.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3ba3c8fb28a5..5e528a76f5f5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2531,15 +2531,6 @@ static int netvsc_probe(struct hv_device *dev, goto devinfo_failed; } - nvdev = rndis_filter_device_add(dev, device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); - netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - goto rndis_failed; - } - - eth_hw_addr_set(net, device_info->mac_adr); - /* We must get rtnl lock before scheduling nvdev->subchan_work, * otherwise netvsc_subchan_work() can get rtnl lock first and wait * all subchannels to show up, but that may not happen because @@ -2547,9 +2538,23 @@ static int netvsc_probe(struct hv_device *dev, * -> ... -> device_add() -> ... -> __device_attach() can't get * the device lock, so all the subchannels can't be processed -- * finally netvsc_subchan_work() hangs forever. + * + * The rtnl lock also needs to be held before rndis_filter_device_add() + * which advertises nvsp_2_vsc_capability / sriov bit, and triggers + * VF NIC offering and registering. If VF NIC finished register_netdev() + * earlier it may cause name based config failure. */ rtnl_lock(); + nvdev = rndis_filter_device_add(dev, device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); + goto rndis_failed; + } + + eth_hw_addr_set(net, device_info->mac_adr); + if (nvdev->num_chn > 1) schedule_work(&nvdev->subchan_work); @@ -2586,9 +2591,9 @@ static int netvsc_probe(struct hv_device *dev, return 0; register_failed: - rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: + rtnl_unlock(); netvsc_devinfo_put(device_info); devinfo_failed: free_percpu(net_device_ctx->vf_stats); -- cgit v1.2.3 From 85520856466ed6bc3b1ccb013cddac70ceb437db Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 19 Nov 2023 08:23:42 -0800 Subject: hv_netvsc: Fix race of register_netdevice_notifier and VF register If VF NIC is registered earlier, NETDEV_REGISTER event is replayed, but NETDEV_POST_INIT is not. Move register_netdevice_notifier() earlier, so the call back function is set before probing. Cc: stable@vger.kernel.org Fixes: e04e7a7bbd4b ("hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()") Reported-by: Dexuan Cui Signed-off-by: Haiyang Zhang Reviewed-by: Wojciech Drewek Reviewed-by: Dexuan Cui Signed-off-by: Paolo Abeni --- drivers/net/hyperv/netvsc_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5e528a76f5f5..b7dfd51f09e6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2793,12 +2793,17 @@ static int __init netvsc_drv_init(void) } netvsc_ring_bytes = ring_size * PAGE_SIZE; + register_netdevice_notifier(&netvsc_netdev_notifier); + ret = vmbus_driver_register(&netvsc_drv); if (ret) - return ret; + goto err_vmbus_reg; - register_netdevice_notifier(&netvsc_netdev_notifier); return 0; + +err_vmbus_reg: + unregister_netdevice_notifier(&netvsc_netdev_notifier); + return ret; } MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c807d6cd089d2f4951baa838081ec5ae3e2360f8 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sun, 19 Nov 2023 08:23:43 -0800 Subject: hv_netvsc: Mark VF as slave before exposing it to user-mode When a VF is being exposed form the kernel, it should be marked as "slave" before exposing to the user-mode. The VF is not usable without netvsc running as master. The user-mode should never see a VF without the "slave" flag. This commit moves the code of setting the slave flag to the time before VF is exposed to user-mode. Cc: stable@vger.kernel.org Fixes: 0c195567a8f6 ("netvsc: transparent VF management") Signed-off-by: Long Li Signed-off-by: Haiyang Zhang Acked-by: Stephen Hemminger Acked-by: Dexuan Cui Signed-off-by: Paolo Abeni --- drivers/net/hyperv/netvsc_drv.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b7dfd51f09e6..706ea5263e87 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2206,9 +2206,6 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - /* set slave flag before open to prevent IPv6 addrconf */ - vf_netdev->flags |= IFF_SLAVE; - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2315,16 +2312,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) } - /* Fallback path to check synthetic vf with - * help of mac addr + /* Fallback path to check synthetic vf with help of mac addr. + * Because this function can be called before vf_netdev is + * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied + * from dev_addr, also try to match to its dev_addr. + * Note: On Hyper-V and Azure, it's not possible to set a MAC address + * on a VF that matches to the MAC of a unrelated NETVSC device. */ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { ndev = hv_get_drvdata(ndev_ctx->device_ctx); - if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { - netdev_notice(vf_netdev, - "falling back to mac addr based matching\n"); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) || + ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr)) return ndev; - } } netdev_notice(vf_netdev, @@ -2332,6 +2331,19 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) return NULL; } +static int netvsc_prepare_bonding(struct net_device *vf_netdev) +{ + struct net_device *ndev; + + ndev = get_netvsc_byslot(vf_netdev); + if (!ndev) + return NOTIFY_DONE; + + /* set slave flag before open to prevent IPv6 addrconf */ + vf_netdev->flags |= IFF_SLAVE; + return NOTIFY_DONE; +} + static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2758,6 +2770,8 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; switch (event) { + case NETDEV_POST_INIT: + return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: return netvsc_register_vf(event_dev); case NETDEV_UNREGISTER: -- cgit v1.2.3 From 0739af07d1d947af27c877f797cb82ceee702515 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 20 Nov 2023 13:06:29 +0100 Subject: net: usb: ax88179_178a: fix failed operations during ax88179_reset Using generic ASIX Electronics Corp. AX88179 Gigabit Ethernet device, the following test cycle has been implemented: - power on - check logs - shutdown - after detecting the system shutdown, disconnect power - after approximately 60 seconds of sleep, power is restored Running some cycles, sometimes error logs like this appear: kernel: ax88179_178a 2-9:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0001: -19 kernel: ax88179_178a 2-9:1.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x0001: -19 ... These failed operation are happening during ax88179_reset execution, so the initialization could not be correct. In order to avoid this, we need to increase the delay after reset and clock initial operations. By using these larger values, many cycles have been run and no failed operations appear. It would be better to check some status register to verify when the operation has finished, but I do not have found any available information (neither in the public datasheets nor in the manufacturer's driver). The only available information for the necessary delays is the maufacturer's driver (original values) but the proposed values are not enough for the tested devices. Fixes: e2ca90c276e1f ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Reported-by: Herb Wei Tested-by: Herb Wei Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20231120120642.54334-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88179_178a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index aff39bf3161d..4ea0e155bb0d 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1583,11 +1583,11 @@ static int ax88179_reset(struct usbnet *dev) *tmp16 = AX_PHYPWR_RSTCTL_IPRL; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - msleep(200); + msleep(500); *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); - msleep(100); + msleep(200); /* Ethernet PHY Auto Detach*/ ax88179_auto_detach(dev); -- cgit v1.2.3 From b6fe6f03716da246b453369f98a553d4ab21447c Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Tue, 21 Nov 2023 09:37:09 +0800 Subject: dpll: Fix potential msg memleak when genlmsg_put_reply failed We should clean the skb resource if genlmsg_put_reply failed. Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Hao Ge Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20231121013709.73323-1-gehao@kylinos.cn Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_netlink.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index a6dc3997bf5c..442a0ebeb953 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1093,9 +1093,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_PIN_ID_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; - + } pin = dpll_pin_find_from_nlattr(info); if (!IS_ERR(pin)) { ret = dpll_msg_add_pin_handle(msg, pin); @@ -1123,8 +1124,10 @@ int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_PIN_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; + } ret = dpll_cmd_pin_get_one(msg, pin, info->extack); if (ret) { nlmsg_free(msg); @@ -1256,8 +1259,10 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_DEVICE_ID_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; + } dpll = dpll_device_find_from_nlattr(info); if (!IS_ERR(dpll)) { @@ -1284,8 +1289,10 @@ int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_DEVICE_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; + } ret = dpll_device_get_one(dpll, msg, info->extack); if (ret) { -- cgit v1.2.3 From 84d2db91f14a32dc856a5972e3f0907089093c7a Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Tue, 21 Nov 2023 15:53:57 +0800 Subject: nfc: virtual_ncidev: Add variable to check if ndev is running syzbot reported an memory leak that happens when an skb is add to send_buff after virtual nci closed. This patch adds a variable to track if the ndev is running before handling new skb in send function. Signed-off-by: Nguyen Dinh Phi Reported-by: syzbot+6eb09d75211863f15e3e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/00000000000075472b06007df4fb@google.com Reviewed-by: Bongsu Jeon Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/virtual_ncidev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index b027be0b0b6f..590b038e449e 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -26,10 +26,14 @@ struct virtual_nci_dev { struct mutex mtx; struct sk_buff *send_buff; struct wait_queue_head wq; + bool running; }; static int virtual_nci_open(struct nci_dev *ndev) { + struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); + + vdev->running = true; return 0; } @@ -40,6 +44,7 @@ static int virtual_nci_close(struct nci_dev *ndev) mutex_lock(&vdev->mtx); kfree_skb(vdev->send_buff); vdev->send_buff = NULL; + vdev->running = false; mutex_unlock(&vdev->mtx); return 0; @@ -50,7 +55,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); - if (vdev->send_buff) { + if (vdev->send_buff || !vdev->running) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; -- cgit v1.2.3 From 6a26310273c323380da21eb23fcfd50e31140913 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 21 Nov 2023 09:09:33 +0100 Subject: Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E" This reverts commit efa5f1311c4998e9e6317c52bc5ee93b3a0f36df. I couldn't reproduce the reported issue. What I did, based on a pcap packet log provided by the reporter: - Used same chip version (RTL8168h) - Set MAC address to the one used on the reporters system - Replayed the EAPOL unicast packet that, according to the reporter, was filtered out by the mc filter. The packet was properly received. Therefore the root cause of the reported issue seems to be somewhere else. Disabling mc filtering completely for the most common chip version is a quite big hammer. Therefore revert the change and wait for further analysis results from the reporter. Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index b9bb1d2f0237..295366a85c63 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2599,9 +2599,7 @@ static void rtl_set_rx_mode(struct net_device *dev) rx_mode &= ~AcceptMulticast; } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || dev->flags & IFF_ALLMULTI || - tp->mac_version == RTL_GIGA_MAC_VER_35 || - tp->mac_version == RTL_GIGA_MAC_VER_46 || - tp->mac_version == RTL_GIGA_MAC_VER_48) { + tp->mac_version == RTL_GIGA_MAC_VER_35) { /* accept all multicasts */ } else if (netdev_mc_empty(dev)) { rx_mode &= ~AcceptMulticast; -- cgit v1.2.3 From 99360d9620f09fb8bc15548d855011bbb198c680 Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Sat, 18 Nov 2023 00:19:18 +0100 Subject: net: usb: qmi_wwan: claim interface 4 for ZTE MF290 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interface 4 is used by for QMI interface in stock firmware of MF28D, the router which uses MF290 modem. Rebind it to qmi_wwan after freeing it up from option driver. The proper configuration is: Interface mapping is: 0: QCDM, 1: (unknown), 2: AT (PCUI), 2: AT (Modem), 4: QMI T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=0189 Rev= 0.00 S: Manufacturer=ZTE, Incorporated S: Product=ZTE LTE Technologies MSM C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms Cc: Bjørn Mork Signed-off-by: Lech Perczak Link: https://lore.kernel.org/r/20231117231918.100278-3-lech.perczak@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 344af3c5c836..e2e181378f41 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1289,6 +1289,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0168, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0176, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0178, 3)}, + {QMI_FIXED_INTF(0x19d2, 0x0189, 4)}, /* ZTE MF290 */ {QMI_FIXED_INTF(0x19d2, 0x0191, 4)}, /* ZTE EuFi890 */ {QMI_FIXED_INTF(0x19d2, 0x0199, 1)}, /* ZTE MF820S */ {QMI_FIXED_INTF(0x19d2, 0x0200, 1)}, -- cgit v1.2.3 From 4aa1d8f89b10cdc25a231dabf808d8935e0b137a Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 21 Nov 2023 22:26:24 +0530 Subject: octeontx2-pf: Fix ntuple rule creation to direct packet to VF with higher Rx queue than its PF It is possible to add a ntuple rule which would like to direct packet to a VF whose number of queues are greater/less than its PF's queue numbers. For example a PF can have 2 Rx queues but a VF created on that PF can have 8 Rx queues. As of today, ntuple rule will reject rule because it is checking the requested queue number against PF's number of Rx queues. As a part of this fix if the action of a ntuple rule is to move a packet to a VF's queue then the check is removed. Also, a debug information is printed to aware user that it is user's responsibility to cross check if the requested queue number on that VF is a valid one. Fixes: f0a1913f8a6f ("octeontx2-pf: Add support for ethtool ntuple filters") Signed-off-by: Suman Ghosh Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231121165624.3664182-1-sumang@marvell.com Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/nic/otx2_flows.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 4762dbea64a1..97a71e9b8563 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -1088,6 +1088,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) struct ethhdr *eth_hdr; bool new = false; int err = 0; + u64 vf_num; u32 ring; if (!flow_cfg->max_flows) { @@ -1100,7 +1101,21 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT)) return -ENOMEM; - if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) + /* Number of queues on a VF can be greater or less than + * the PF's queue. Hence no need to check for the + * queue count. Hence no need to check queue count if PF + * is installing for its VF. Below is the expected vf_num value + * based on the ethtool commands. + * + * e.g. + * 1. ethtool -U ... action -1 ==> vf_num:255 + * 2. ethtool -U ... action ==> vf_num:0 + * 3. ethtool -U ... vf queue ==> + * vf_num:vf_idx+1 + */ + vf_num = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie); + if (!is_otx2_vf(pfvf->pcifunc) && !vf_num && + ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; if (fsp->location >= otx2_get_maxflows(flow_cfg)) @@ -1182,6 +1197,9 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) flow_cfg->nr_flows++; } + if (flow->is_vf) + netdev_info(pfvf->netdev, + "Make sure that VF's queue number is within its queue limit\n"); return 0; } -- cgit v1.2.3 From 818ad9cc90d4a7165caaee7e32800c50d0564ec3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Nov 2023 20:08:44 +0100 Subject: net: veth: fix ethtool stats reporting Fix a possible misalignment between page_pool stats and tx xdp_stats reported in veth_get_ethtool_stats routine. The issue can be reproduced configuring the veth pair with the following tx/rx queues: $ip link add v0 numtxqueues 2 numrxqueues 4 type veth peer name v1 \ numtxqueues 1 numrxqueues 1 and loading a simple XDP program on v0 that just returns XDP_PASS. In this case on v0 the page_pool stats overwrites tx xdp_stats for queue 1. Fix the issue incrementing pp_idx of dev->real_num_tx_queues * VETH_TQ_STATS_LEN since we always report xdp_stats for all tx queues in ethtool. Fixes: 4fc418053ec7 ("net: veth: add page_pool stats") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/c5b5d0485016836448453f12846c7c4ab75b094a.1700593593.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 6cc352296c67..57efb3454c57 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -236,8 +236,8 @@ static void veth_get_ethtool_stats(struct net_device *dev, data[tx_idx + j] += *(u64 *)(base + offset); } } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); - pp_idx = tx_idx + VETH_TQ_STATS_LEN; } + pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN; page_pool_stats: veth_get_page_pool_stats(dev, &data[pp_idx]); -- cgit v1.2.3 From 676ec53844cbdf2f47e68a076cdff7f0ec6cbe3f Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:33 +0530 Subject: amd-xgbe: handle corner-case during sfp hotplug Force the mode change for SFI in Fixed PHY configurations. Fixed PHY configurations needs PLL to be enabled while doing mode set. When the SFP module isn't connected during boot, driver assumes AN is ON and attempts auto-negotiation. However, if the connected SFP comes up in Fixed PHY configuration the link will not come up as PLL isn't enabled while the initial mode set command is issued. So, force the mode change for SFI in Fixed PHY configuration to fix link issues. Fixes: e57f7a3feaef ("amd-xgbe: Prepare for working with more than one type of phy") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 32d2c6fac652..4a2dc705b528 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1193,7 +1193,19 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) if (pdata->phy.duplex != DUPLEX_FULL) return -EINVAL; - xgbe_set_mode(pdata, mode); + /* Force the mode change for SFI in Fixed PHY config. + * Fixed PHY configs needs PLL to be enabled while doing mode set. + * When the SFP module isn't connected during boot, driver assumes + * AN is ON and attempts autonegotiation. However, if the connected + * SFP comes up in Fixed PHY config, the link will not come up as + * PLL isn't enabled while the initial mode set command is issued. + * So, force the mode change for SFI in Fixed PHY configuration to + * fix link issues. + */ + if (mode == XGBE_MODE_SFI) + xgbe_change_mode(pdata, mode); + else + xgbe_set_mode(pdata, mode); return 0; } -- cgit v1.2.3 From 7121205d5330c6a3cb3379348886d47c77b78d06 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:34 +0530 Subject: amd-xgbe: handle the corner-case during tx completion The existing implementation uses software logic to accumulate tx completions until the specified time (1ms) is met and then poll them. However, there exists a tiny gap which leads to a race between resetting and checking the tx_activate flag. Due to this the tx completions are not reported to upper layer and tx queue timeout kicks-in restarting the device. To address this, introduce a tx cleanup mechanism as part of the periodic maintenance process. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 614c0278419b..6b73648b3779 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -682,10 +682,24 @@ static void xgbe_service(struct work_struct *work) static void xgbe_service_timer(struct timer_list *t) { struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer); + struct xgbe_channel *channel; + unsigned int i; queue_work(pdata->dev_workqueue, &pdata->service_work); mod_timer(&pdata->service_timer, jiffies + HZ); + + if (!pdata->tx_usecs) + return; + + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + if (!channel->tx_ring || channel->tx_timer_active) + break; + channel->tx_timer_active = 1; + mod_timer(&channel->tx_timer, + jiffies + usecs_to_jiffies(pdata->tx_usecs)); + } } static void xgbe_init_timers(struct xgbe_prv_data *pdata) -- cgit v1.2.3 From 7a2323ac24a50311f64a3a9b54ed5bef5821ecae Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:35 +0530 Subject: amd-xgbe: propagate the correct speed and duplex status xgbe_get_link_ksettings() does not propagate correct speed and duplex information to ethtool during cable unplug. Due to which ethtool reports incorrect values for speed and duplex. Address this by propagating correct information. Fixes: 7c12aa08779c ("amd-xgbe: Move the PHY support into amd-xgbe") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 6e83ff59172a..32fab5e77246 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -314,10 +314,15 @@ static int xgbe_get_link_ksettings(struct net_device *netdev, cmd->base.phy_address = pdata->phy.address; - cmd->base.autoneg = pdata->phy.autoneg; - cmd->base.speed = pdata->phy.speed; - cmd->base.duplex = pdata->phy.duplex; + if (netif_carrier_ok(netdev)) { + cmd->base.speed = pdata->phy.speed; + cmd->base.duplex = pdata->phy.duplex; + } else { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + } + cmd->base.autoneg = pdata->phy.autoneg; cmd->base.port = PORT_NONE; XGBE_LM_COPY(cmd, supported, lks, supported); -- cgit v1.2.3 From 0ffb08b1a45bd6b7694e01da0e1d9e3e788418fb Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 21 Nov 2023 13:12:55 -0800 Subject: ice: remove ptp_tx ring parameter flag Before performing a Tx timestamp in ice_stamp(), the driver checks a ptp_tx ring variable to see if timestamping is enabled on that ring. This value is set for all rings whenever userspace configures Tx timestamping. Ostensibly this was done to avoid wasting cycles checking other fields when timestamping has not been enabled. However, for Tx timestamps we already get an individual per-SKB flag indicating whether userspace wants to request a timestamp on that packet. We do not gain much by also having a separate flag to check for whether timestamping was enabled. In fact, the driver currently fails to restore the field after a PF reset. Because of this, if a PF reset occurs, timestamps will be disabled. Since this flag doesn't add value in the hotpath, remove it and always provide a timestamp if the SKB flag has been set. A following change will fix the reset path to properly restore user timestamping configuration completely. This went unnoticed for some time because one of the most common applications using Tx timestamps, ptp4l, will reconfigure the socket as part of its fault recovery logic. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_ptp.c | 14 -------------- drivers/net/ethernet/intel/ice/ice_txrx.c | 3 --- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - 3 files changed, 18 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 1eddcbe89b0c..affd90622a68 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -280,20 +280,6 @@ static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on) */ static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) { - struct ice_vsi *vsi; - u16 i; - - vsi = ice_get_main_vsi(pf); - if (!vsi) - return; - - /* Set the timestamp enable flag for all the Tx rings */ - ice_for_each_txq(vsi, i) { - if (!vsi->tx_rings[i]) - continue; - vsi->tx_rings[i]->ptp_tx = on; - } - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF) ice_ptp_configure_tx_tstamp(pf, on); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 52d0a126eb61..9e97ea863068 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2306,9 +2306,6 @@ ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb, if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) return; - if (!tx_ring->ptp_tx) - return; - /* Tx timestamps cannot be sampled when doing TSO */ if (first->tx_flags & ICE_TX_FLAGS_TSO) return; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 166413fc33f4..daf7b9dbb143 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -380,7 +380,6 @@ struct ice_tx_ring { #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) u8 flags; u8 dcb_tc; /* Traffic class of ring */ - u8 ptp_tx; } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) -- cgit v1.2.3 From 7d606a1e2d0575b6c3a2600f43f90d1e409f9661 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 21 Nov 2023 13:12:56 -0800 Subject: ice: unify logic for programming PFINT_TSYN_MSK Commit d938a8cca88a ("ice: Auxbus devices & driver for E822 TS") modified how Tx timestamps are handled for E822 devices. On these devices, only the clock owner handles reading the Tx timestamp data from firmware. To do this, the PFINT_TSYN_MSK register is modified from the default value to one which enables reacting to a Tx timestamp on all PHY ports. The driver currently programs PFINT_TSYN_MSK in different places depending on whether the port is the clock owner or not. For the clock owner, the PFINT_TSYN_MSK value is programmed during ice_ptp_init_owner just before calling ice_ptp_tx_ena_intr to program the PHY ports. For the non-clock owner ports, the PFINT_TSYN_MSK is programmed during ice_ptp_init_port. If a large enough device reset occurs, the PFINT_TSYN_MSK register will be reset to the default value in which only the PHY associated directly with the PF will cause the Tx timestamp interrupt to trigger. The driver lacks logic to reprogram the PFINT_TSYN_MSK register after a device reset. For the E822 device, this results in the PF no longer responding to interrupts for other ports. This results in failure to deliver Tx timestamps to user space applications. Rename ice_ptp_configure_tx_tstamp to ice_ptp_cfg_tx_interrupt, and unify the logic for programming PFINT_TSYN_MSK and PFINT_OICR_ENA into one place. This function will program both registers according to the combination of user configuration and device requirements. This ensures that PFINT_TSYN_MSK is always restored when we configure the Tx timestamp interrupt. Fixes: d938a8cca88a ("ice: Auxbus devices & driver for E822 TS") Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_ptp.c | 60 ++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index affd90622a68..624d05b4bbd9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -256,21 +256,42 @@ ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin, } /** - * ice_ptp_configure_tx_tstamp - Enable or disable Tx timestamp interrupt - * @pf: The PF pointer to search in - * @on: bool value for whether timestamp interrupt is enabled or disabled + * ice_ptp_cfg_tx_interrupt - Configure Tx timestamp interrupt for the device + * @pf: Board private structure + * + * Program the device to respond appropriately to the Tx timestamp interrupt + * cause. */ -static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on) +static void ice_ptp_cfg_tx_interrupt(struct ice_pf *pf) { + struct ice_hw *hw = &pf->hw; + bool enable; u32 val; + switch (pf->ptp.tx_interrupt_mode) { + case ICE_PTP_TX_INTERRUPT_ALL: + /* React to interrupts across all quads. */ + wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f); + enable = true; + break; + case ICE_PTP_TX_INTERRUPT_NONE: + /* Do not react to interrupts on any quad. */ + wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0); + enable = false; + break; + case ICE_PTP_TX_INTERRUPT_SELF: + default: + enable = pf->ptp.tstamp_config.tx_type == HWTSTAMP_TX_ON; + break; + } + /* Configure the Tx timestamp interrupt */ - val = rd32(&pf->hw, PFINT_OICR_ENA); - if (on) + val = rd32(hw, PFINT_OICR_ENA); + if (enable) val |= PFINT_OICR_TSYN_TX_M; else val &= ~PFINT_OICR_TSYN_TX_M; - wr32(&pf->hw, PFINT_OICR_ENA, val); + wr32(hw, PFINT_OICR_ENA, val); } /** @@ -280,10 +301,9 @@ static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on) */ static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) { - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF) - ice_ptp_configure_tx_tstamp(pf, on); - pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + + ice_ptp_cfg_tx_interrupt(pf); } /** @@ -2789,15 +2809,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf) /* Release the global hardware lock */ ice_ptp_unlock(hw); - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL) { - /* The clock owner for this device type handles the timestamp - * interrupt for all ports. - */ - ice_ptp_configure_tx_tstamp(pf, true); - - /* React on all quads interrupts for E82x */ - wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f); - + if (!ice_is_e810(hw)) { /* Enable quad interrupts */ err = ice_ptp_tx_ena_intr(pf, true, itr); if (err) @@ -2867,13 +2879,6 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) case ICE_PHY_E810: return ice_ptp_init_tx_e810(pf, &ptp_port->tx); case ICE_PHY_E822: - /* Non-owner PFs don't react to any interrupts on E82x, - * neither on own quad nor on others - */ - if (!ice_ptp_pf_handles_tx_interrupt(pf)) { - ice_ptp_configure_tx_tstamp(pf, false); - wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0); - } kthread_init_delayed_work(&ptp_port->ov_work, ice_ptp_wait_for_offsets); @@ -3018,6 +3023,9 @@ void ice_ptp_init(struct ice_pf *pf) /* Start the PHY timestamping block */ ice_ptp_reset_phy_timestamping(pf); + /* Configure initial Tx interrupt settings */ + ice_ptp_cfg_tx_interrupt(pf); + set_bit(ICE_FLAG_PTP, pf->flags); err = ice_ptp_init_work(pf, ptp); if (err) -- cgit v1.2.3 From 7758017911a4f2578d54c318e8fe77bcb5899054 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 21 Nov 2023 13:12:57 -0800 Subject: ice: restore timestamp configuration after device reset The driver calls ice_ptp_cfg_timestamp() during ice_ptp_prepare_for_reset() to disable timestamping while the device is resetting. This operation destroys the user requested configuration. While the driver does call ice_ptp_cfg_timestamp in ice_rebuild() to restore some hardware settings after a reset, it unconditionally passes true or false, resulting in failure to restore previous user space configuration. This results in a device reset forcibly disabling timestamp configuration regardless of current user settings. This was not detected previously due to a quirk of the LinuxPTP ptp4l application. If ptp4l detects a missing timestamp, it enters a fault state and performs recovery logic which includes executing SIOCSHWTSTAMP again, restoring the now accidentally cleared configuration. Not every application does this, and for these applications, timestamps will mysteriously stop after a PF reset, without being restored until an application restart. Fix this by replacing ice_ptp_cfg_timestamp() with two new functions: 1) ice_ptp_disable_timestamp_mode() which unconditionally disables the timestamping logic in ice_ptp_prepare_for_reset() and ice_ptp_release() 2) ice_ptp_restore_timestamp_mode() which calls ice_ptp_restore_tx_interrupt() to restore Tx timestamping configuration, calls ice_set_rx_tstamp() to restore Rx timestamping configuration, and issues an immediate TSYN_TX interrupt to ensure that timestamps which may have occurred during the device reset get processed. Modify the ice_ptp_set_timestamp_mode to directly save the user configuration and then call ice_ptp_restore_timestamp_mode. This way, reset no longer destroys the saved user configuration. This obsoletes the ice_set_tx_tstamp() function which can now be safely removed. With this change, all devices should now restore Tx and Rx timestamping functionality correctly after a PF reset without application intervention. Fixes: 77a781155a65 ("ice: enable receive hardware timestamping") Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_main.c | 12 ++--- drivers/net/ethernet/intel/ice/ice_ptp.c | 74 +++++++++++++++++++------------ drivers/net/ethernet/intel/ice/ice_ptp.h | 5 +-- 3 files changed, 51 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6607fa6fe556..fb9c93f37e84 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7401,15 +7401,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } - /* configure PTP timestamping after VSI rebuild */ - if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) { - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF) - ice_ptp_cfg_timestamp(pf, false); - else if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL) - /* for E82x PHC owner always need to have interrupts */ - ice_ptp_cfg_timestamp(pf, true); - } - err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); if (err) { dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); @@ -7461,6 +7452,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_plug_aux_dev(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); + + /* Restore timestamp mode settings after VSI rebuild */ + ice_ptp_restore_timestamp_mode(pf); return; err_vsi_rebuild: diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 624d05b4bbd9..71f405f8a6fe 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -294,18 +294,6 @@ static void ice_ptp_cfg_tx_interrupt(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, val); } -/** - * ice_set_tx_tstamp - Enable or disable Tx timestamping - * @pf: The PF pointer to search in - * @on: bool value for whether timestamps are enabled or disabled - */ -static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) -{ - pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - - ice_ptp_cfg_tx_interrupt(pf); -} - /** * ice_set_rx_tstamp - Enable or disable Rx timestamping * @pf: The PF pointer to search in @@ -317,7 +305,7 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) u16 i; vsi = ice_get_main_vsi(pf); - if (!vsi) + if (!vsi || !vsi->rx_rings) return; /* Set the timestamp flag for all the Rx rings */ @@ -326,23 +314,50 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) continue; vsi->rx_rings[i]->ptp_rx = on; } +} + +/** + * ice_ptp_disable_timestamp_mode - Disable current timestamp mode + * @pf: Board private structure + * + * Called during preparation for reset to temporarily disable timestamping on + * the device. Called during remove to disable timestamping while cleaning up + * driver resources. + */ +static void ice_ptp_disable_timestamp_mode(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u32 val; + + val = rd32(hw, PFINT_OICR_ENA); + val &= ~PFINT_OICR_TSYN_TX_M; + wr32(hw, PFINT_OICR_ENA, val); - pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL : - HWTSTAMP_FILTER_NONE; + ice_set_rx_tstamp(pf, false); } /** - * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit + * ice_ptp_restore_timestamp_mode - Restore timestamp configuration * @pf: Board private structure - * @ena: bool value to enable or disable time stamp * - * This function will configure timestamping during PTP initialization - * and deinitialization + * Called at the end of rebuild to restore timestamp configuration after + * a device reset. */ -void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) +void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) { - ice_set_tx_tstamp(pf, ena); - ice_set_rx_tstamp(pf, ena); + struct ice_hw *hw = &pf->hw; + bool enable_rx; + + ice_ptp_cfg_tx_interrupt(pf); + + enable_rx = pf->ptp.tstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; + ice_set_rx_tstamp(pf, enable_rx); + + /* Trigger an immediate software interrupt to ensure that timestamps + * which occurred during reset are handled now. + */ + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); } /** @@ -2043,10 +2058,10 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) { switch (config->tx_type) { case HWTSTAMP_TX_OFF: - ice_set_tx_tstamp(pf, false); + pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF; break; case HWTSTAMP_TX_ON: - ice_set_tx_tstamp(pf, true); + pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON; break; default: return -ERANGE; @@ -2054,7 +2069,7 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: - ice_set_rx_tstamp(pf, false); + pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: @@ -2070,12 +2085,15 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: case HWTSTAMP_FILTER_ALL: - ice_set_rx_tstamp(pf, true); + pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } + /* Immediately update the device timestamping mode */ + ice_ptp_restore_timestamp_mode(pf); + return 0; } @@ -2743,7 +2761,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf) clear_bit(ICE_FLAG_PTP, pf->flags); /* Disable timestamping for both Tx and Rx */ - ice_ptp_cfg_timestamp(pf, false); + ice_ptp_disable_timestamp_mode(pf); kthread_cancel_delayed_work_sync(&ptp->work); @@ -3061,7 +3079,7 @@ void ice_ptp_release(struct ice_pf *pf) return; /* Disable timestamping for both Tx and Rx */ - ice_ptp_cfg_timestamp(pf, false); + ice_ptp_disable_timestamp_mode(pf); ice_ptp_remove_auxbus_device(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 8f6f94392756..06a330867fc9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -292,7 +292,7 @@ int ice_ptp_clock_index(struct ice_pf *pf); struct ice_pf; int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr); int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); -void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena); +void ice_ptp_restore_timestamp_mode(struct ice_pf *pf); void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); @@ -317,8 +317,7 @@ static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) return -EOPNOTSUPP; } -static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { } - +static inline void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) { } static inline void ice_ptp_extts_event(struct ice_pf *pf) { } static inline s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) -- cgit v1.2.3 From 4e20655e503e3a478cd1682bf25e3202dd823da8 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 21 Nov 2023 13:13:36 -0800 Subject: i40e: Fix adding unsupported cloud filters If a VF tries to add unsupported cloud filter through virtchnl then i40e_add_del_cloud_filter(_big_buf) returns -ENOTSUPP but this error code is stored in 'ret' instead of 'aq_ret' that is used as error code sent back to VF. In this scenario where one of the mentioned functions fails the value of 'aq_ret' is zero so the VF will incorrectly receive a 'success'. Use 'aq_ret' to store return value and remove 'ret' local variable. Additionally fix the issue when filter allocation fails, in this case no notification is sent back to the VF. Fixes: e284fc280473 ("i40e: Add and delete cloud filter") Reviewed-by: Simon Horman Signed-off-by: Ivan Vecera Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20231121211338.3348677-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 08d7edccfb8d..3f99eb198245 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3844,7 +3844,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; int aq_ret = 0; - int i, ret; + int i; if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = -EINVAL; @@ -3868,8 +3868,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) } cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); - if (!cfilter) - return -ENOMEM; + if (!cfilter) { + aq_ret = -ENOMEM; + goto err_out; + } /* parse destination mac address */ for (i = 0; i < ETH_ALEN; i++) @@ -3917,13 +3919,13 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) /* Adding cloud filter programmed as TC filter */ if (tcf.dst_port) - ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); + aq_ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); else - ret = i40e_add_del_cloud_filter(vsi, cfilter, true); - if (ret) { + aq_ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + if (aq_ret) { dev_err(&pf->pdev->dev, "VF %d: Failed to add cloud filter, err %pe aq_err %s\n", - vf->vf_id, ERR_PTR(ret), + vf->vf_id, ERR_PTR(aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto err_free; } -- cgit v1.2.3 From fd0413bbf8b11f56e8aa842783b0deda0dfe2926 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 21 Nov 2023 16:42:17 -0800 Subject: net: axienet: Fix check for partial TX checksum Due to a typo, the code checked the RX checksum feature in the TX path. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Samuel Holland Reviewed-by: Andrew Lunn Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20231122004219.3504219-1-samuel.holland@sifive.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 82d0d44b2b02..bf6e33990490 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -822,7 +822,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (lp->features & XAE_FEATURE_FULL_TX_CSUM) { /* Tx Full Checksum Offload Enabled */ cur_p->app0 |= 2; - } else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) { + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) { csum_start_off = skb_transport_offset(skb); csum_index_off = csum_start_off + skb->csum_offset; /* Tx Partial Checksum Offload Enabled */ -- cgit v1.2.3 From 37f0205538baf70beb57cdcb6c7d14aa13257926 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 22 Nov 2023 17:17:08 -0600 Subject: net: ipa: fix one GSI register field width The width of the R_LENGTH field of the EV_CH_E_CNTXT_1 GSI register is 24 bits (not 20 bits) starting with IPA v5.0. Fix this. Fixes: faf0678ec8a0 ("net: ipa: add IPA v5.0 GSI register definitions") Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20231122231708.896632-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/reg/gsi_reg-v5.0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ipa/reg/gsi_reg-v5.0.c b/drivers/net/ipa/reg/gsi_reg-v5.0.c index d7b81a36d673..145eb0bd096d 100644 --- a/drivers/net/ipa/reg/gsi_reg-v5.0.c +++ b/drivers/net/ipa/reg/gsi_reg-v5.0.c @@ -78,7 +78,7 @@ REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0, 0x0001c000 + 0x12000 * GSI_EE_AP, 0x80); static const u32 reg_ev_ch_e_cntxt_1_fmask[] = { - [R_LENGTH] = GENMASK(19, 0), + [R_LENGTH] = GENMASK(23, 0), }; REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1, -- cgit v1.2.3