summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/igc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 02:43:10 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 02:43:10 +0300
commit3a8a670eeeaa40d87bd38a587438952741980c18 (patch)
treed5546d311271503eadf75b45d87e12720e72899f /drivers/net/ethernet/intel/igc
parent6a8cbd9253abc1bd0df4d60c4c24fa555190376d (diff)
parentae230642190a51b85656d6da2df744d534d59544 (diff)
downloadlinux-3a8a670eeeaa40d87bd38a587438952741980c18.tar.xz
Merge tag 'net-next-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking changes from Jakub Kicinski: "WiFi 7 and sendpage changes are the biggest pieces of work for this release. The latter will definitely require fixes but I think that we got it to a reasonable point. Core: - Rework the sendpage & splice implementations Instead of feeding data into sockets page by page extend sendmsg handlers to support taking a reference on the data, controlled by a new flag called MSG_SPLICE_PAGES Rework the handling of unexpected-end-of-file to invoke an additional callback instead of trying to predict what the right combination of MORE/NOTLAST flags is Remove the MSG_SENDPAGE_NOTLAST flag completely - Implement SCM_PIDFD, a new type of CMSG type analogous to SCM_CREDENTIALS, but it contains pidfd instead of plain pid - Enable socket busy polling with CONFIG_RT - Improve reliability and efficiency of reporting for ref_tracker - Auto-generate a user space C library for various Netlink families Protocols: - Allow TCP to shrink the advertised window when necessary, prevent sk_rcvbuf auto-tuning from growing the window all the way up to tcp_rmem[2] - Use per-VMA locking for "page-flipping" TCP receive zerocopy - Prepare TCP for device-to-device data transfers, by making sure that payloads are always attached to skbs as page frags - Make the backoff time for the first N TCP SYN retransmissions linear. Exponential backoff is unnecessarily conservative - Create a new MPTCP getsockopt to retrieve all info (MPTCP_FULL_INFO) - Avoid waking up applications using TLS sockets until we have a full record - Allow using kernel memory for protocol ioctl callbacks, paving the way to issuing ioctls over io_uring - Add nolocalbypass option to VxLAN, forcing packets to be fully encapsulated even if they are destined for a local IP address - Make TCPv4 use consistent hash in TIME_WAIT and SYN_RECV. Ensure in-kernel ECMP implementation (e.g. Open vSwitch) select the same link for all packets. Support L4 symmetric hashing in Open vSwitch - PPPoE: make number of hash bits configurable - Allow DNS to be overwritten by DHCPACK in the in-kernel DHCP client (ipconfig) - Add layer 2 miss indication and filtering, allowing higher layers (e.g. ACL filters) to make forwarding decisions based on whether packet matched forwarding state in lower devices (bridge) - Support matching on Connectivity Fault Management (CFM) packets - Hide the "link becomes ready" IPv6 messages by demoting their printk level to debug - HSR: don't enable promiscuous mode if device offloads the proto - Support active scanning in IEEE 802.15.4 - Continue work on Multi-Link Operation for WiFi 7 BPF: - Add precision propagation for subprogs and callbacks. This allows maintaining verification efficiency when subprograms are used, or in fact passing the verifier at all for complex programs, especially those using open-coded iterators - Improve BPF's {g,s}setsockopt() length handling. Previously BPF assumed the length is always equal to the amount of written data. But some protos allow passing a NULL buffer to discover what the output buffer *should* be, without writing anything - Accept dynptr memory as memory arguments passed to helpers - Add routing table ID to bpf_fib_lookup BPF helper - Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands - Drop bpf_capable() check in BPF_MAP_FREEZE command (used to mark maps as read-only) - Show target_{obj,btf}_id in tracing link fdinfo - Addition of several new kfuncs (most of the names are self-explanatory): - Add a set of new dynptr kfuncs: bpf_dynptr_adjust(), bpf_dynptr_is_null(), bpf_dynptr_is_rdonly(), bpf_dynptr_size() and bpf_dynptr_clone(). - bpf_task_under_cgroup() - bpf_sock_destroy() - force closing sockets - bpf_cpumask_first_and(), rework bpf_cpumask_any*() kfuncs Netfilter: - Relax set/map validation checks in nf_tables. Allow checking presence of an entry in a map without using the value - Increase ip_vs_conn_tab_bits range for 64BIT builds - Allow updating size of a set - Improve NAT tuple selection when connection is closing Driver API: - Integrate netdev with LED subsystem, to allow configuring HW "offloaded" blinking of LEDs based on link state and activity (i.e. packets coming in and out) - Support configuring rate selection pins of SFP modules - Factor Clause 73 auto-negotiation code out of the drivers, provide common helper routines - Add more fool-proof helpers for managing lifetime of MDIO devices associated with the PCS layer - Allow drivers to report advanced statistics related to Time Aware scheduler offload (taprio) - Allow opting out of VF statistics in link dump, to allow more VFs to fit into the message - Split devlink instance and devlink port operations New hardware / drivers: - Ethernet: - Synopsys EMAC4 IP support (stmmac) - Marvell 88E6361 8 port (5x1GE + 3x2.5GE) switches - Marvell 88E6250 7 port switches - Microchip LAN8650/1 Rev.B0 PHYs - MediaTek MT7981/MT7988 built-in 1GE PHY driver - WiFi: - Realtek RTL8192FU, 2.4 GHz, b/g/n mode, 2T2R, 300 Mbps - Realtek RTL8723DS (SDIO variant) - Realtek RTL8851BE - CAN: - Fintek F81604 Drivers: - Ethernet NICs: - Intel (100G, ice): - support dynamic interrupt allocation - use meta data match instead of VF MAC addr on slow-path - nVidia/Mellanox: - extend link aggregation to handle 4, rather than just 2 ports - spawn sub-functions without any features by default - OcteonTX2: - support HTB (Tx scheduling/QoS) offload - make RSS hash generation configurable - support selecting Rx queue using TC filters - Wangxun (ngbe/txgbe): - add basic Tx/Rx packet offloads - add phylink support (SFP/PCS control) - Freescale/NXP (enetc): - report TAPRIO packet statistics - Solarflare/AMD: - support matching on IP ToS and UDP source port of outer header - VxLAN and GENEVE tunnel encapsulation over IPv4 or IPv6 - add devlink dev info support for EF10 - Virtual NICs: - Microsoft vNIC: - size the Rx indirection table based on requested configuration - support VLAN tagging - Amazon vNIC: - try to reuse Rx buffers if not fully consumed, useful for ARM servers running with 16kB pages - Google vNIC: - support TCP segmentation of >64kB frames - Ethernet embedded switches: - Marvell (mv88e6xxx): - enable USXGMII (88E6191X) - Microchip: - lan966x: add support for Egress Stage 0 ACL engine - lan966x: support mapping packet priority to internal switch priority (based on PCP or DSCP) - Ethernet PHYs: - Broadcom PHYs: - support for Wake-on-LAN for BCM54210E/B50212E - report LPI counter - Microsemi PHYs: support RGMII delay configuration (VSC85xx) - Micrel PHYs: receive timestamp in the frame (LAN8841) - Realtek PHYs: support optional external PHY clock - Altera TSE PCS: merge the driver into Lynx PCS which it is a variant of - CAN: Kvaser PCIEcan: - support packet timestamping - WiFi: - Intel (iwlwifi): - major update for new firmware and Multi-Link Operation (MLO) - configuration rework to drop test devices and split the different families - support for segmented PNVM images and power tables - new vendor entries for PPAG (platform antenna gain) feature - Qualcomm 802.11ax (ath11k): - Multiple Basic Service Set Identifier (MBSSID) and Enhanced MBSSID Advertisement (EMA) support in AP mode - support factory test mode - RealTek (rtw89): - add RSSI based antenna diversity - support U-NII-4 channels on 5 GHz band - RealTek (rtl8xxxu): - AP mode support for 8188f - support USB RX aggregation for the newer chips" * tag 'net-next-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1602 commits) net: scm: introduce and use scm_recv_unix helper af_unix: Skip SCM_PIDFD if scm->pid is NULL. net: lan743x: Simplify comparison netlink: Add __sock_i_ino() for __netlink_diag_dump(). net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses Revert "af_unix: Call scm_recv() only after scm_set_cred()." phylink: ReST-ify the phylink_pcs_neg_mode() kdoc libceph: Partially revert changes to support MSG_SPLICE_PAGES net: phy: mscc: fix packet loss due to RGMII delays net: mana: use vmalloc_array and vcalloc net: enetc: use vmalloc_array and vcalloc ionic: use vmalloc_array and vcalloc pds_core: use vmalloc_array and vcalloc gve: use vmalloc_array and vcalloc octeon_ep: use vmalloc_array and vcalloc net: usb: qmi_wwan: add u-blox 0x1312 composition perf trace: fix MSG_SPLICE_PAGES build error ipvlan: Fix return value of ipvlan_queue_xmit() netfilter: nf_tables: fix underflow in chain reference counter netfilter: nf_tables: unbind non-anonymous set if rule construction fails ...
Diffstat (limited to 'drivers/net/ethernet/intel/igc')
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h43
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c151
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c142
3 files changed, 276 insertions, 60 deletions
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 34aebf00a512..00a5ee487812 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -13,6 +13,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/timecounter.h>
#include <linux/net_tstamp.h>
+#include <linux/bitfield.h>
#include "igc_hw.h"
@@ -228,7 +229,10 @@ struct igc_adapter {
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_caps;
- struct work_struct ptp_tx_work;
+ /* Access to ptp_tx_skb and ptp_tx_start are protected by the
+ * ptp_tx_lock.
+ */
+ spinlock_t ptp_tx_lock;
struct sk_buff *ptp_tx_skb;
struct hwtstamp_config tstamp_config;
unsigned long ptp_tx_start;
@@ -311,6 +315,33 @@ extern char igc_driver_name[];
#define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000
#define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
+/* RX-desc Write-Back format RSS Type's */
+enum igc_rss_type_num {
+ IGC_RSS_TYPE_NO_HASH = 0,
+ IGC_RSS_TYPE_HASH_TCP_IPV4 = 1,
+ IGC_RSS_TYPE_HASH_IPV4 = 2,
+ IGC_RSS_TYPE_HASH_TCP_IPV6 = 3,
+ IGC_RSS_TYPE_HASH_IPV6_EX = 4,
+ IGC_RSS_TYPE_HASH_IPV6 = 5,
+ IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6,
+ IGC_RSS_TYPE_HASH_UDP_IPV4 = 7,
+ IGC_RSS_TYPE_HASH_UDP_IPV6 = 8,
+ IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9,
+ IGC_RSS_TYPE_MAX = 10,
+};
+#define IGC_RSS_TYPE_MAX_TABLE 16
+#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
+
+/* igc_rss_type - Rx descriptor RSS type field */
+static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
+{
+ /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
+ * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
+ * is slightly slower than via u32 (wb.lower.lo_dword.data)
+ */
+ return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
+}
+
/* Interrupt defines */
#define IGC_START_ITR 648 /* ~6000 ints/sec */
#define IGC_4K_ITR 980
@@ -401,7 +432,6 @@ enum igc_state_t {
__IGC_TESTING,
__IGC_RESETTING,
__IGC_DOWN,
- __IGC_PTP_TX_IN_PROGRESS,
};
enum igc_tx_flags {
@@ -471,6 +501,13 @@ struct igc_rx_buffer {
};
};
+/* context wrapper around xdp_buff to provide access to descriptor metadata */
+struct igc_xdp_buff {
+ struct xdp_buff xdp;
+ union igc_adv_rx_desc *rx_desc;
+ ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
+};
+
struct igc_q_vector {
struct igc_adapter *adapter; /* backlink */
void __iomem *itr_register;
@@ -578,6 +615,7 @@ enum igc_ring_flags_t {
IGC_RING_FLAG_TX_CTX_IDX,
IGC_RING_FLAG_TX_DETECT_HANG,
IGC_RING_FLAG_AF_XDP_ZC,
+ IGC_RING_FLAG_TX_HWTSTAMP,
};
#define ring_uses_large_buffer(ring) \
@@ -634,6 +672,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
void igc_ptp_tx_hang(struct igc_adapter *adapter);
void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
+void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index fa764190f270..019ce91c45aa 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1585,14 +1585,16 @@ done:
}
}
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
+ skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
/* FIXME: add support for retrieving timestamps from
* the other timer registers before skipping the
* timestamping request.
*/
- if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
- !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
- &adapter->state)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+ if (!adapter->ptp_tx_skb) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= IGC_TX_FLAGS_TSTAMP;
@@ -1601,6 +1603,8 @@ done:
} else {
adapter->tx_hwtstamp_skipped++;
}
+
+ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}
if (skb_vlan_tag_present(skb)) {
@@ -1697,14 +1701,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
le32_to_cpu(rx_desc->wb.upper.status_error));
}
+/* Mapping HW RSS Type to enum pkt_hash_types */
+static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
+ [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2,
+ [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3,
+ [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3,
+ [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3,
+ [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4,
+ [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
+ [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */
+ [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */
+ [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */
+ [13] = PKT_HASH_TYPE_NONE,
+ [14] = PKT_HASH_TYPE_NONE,
+ [15] = PKT_HASH_TYPE_NONE,
+};
+
static inline void igc_rx_hash(struct igc_ring *ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- if (ring->netdev->features & NETIF_F_RXHASH)
- skb_set_hash(skb,
- le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
- PKT_HASH_TYPE_L3);
+ if (ring->netdev->features & NETIF_F_RXHASH) {
+ u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+ u32 rss_type = igc_rss_type(rx_desc);
+
+ skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
+ }
}
static void igc_rx_vlan(struct igc_ring *rx_ring,
@@ -2221,6 +2247,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
if (!count)
return ok;
+ XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff);
+
desc = IGC_RX_DESC(ring, i);
bi = &ring->rx_buffer_info[i];
i -= ring->count;
@@ -2394,6 +2422,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
nq = txring_txq(ring);
__netif_tx_lock(nq, cpu);
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
res = igc_xdp_init_tx_descriptor(ring, xdpf);
__netif_tx_unlock(nq);
return res;
@@ -2505,8 +2535,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
union igc_adv_rx_desc *rx_desc;
struct igc_rx_buffer *rx_buffer;
unsigned int size, truesize;
+ struct igc_xdp_buff ctx;
ktime_t timestamp = 0;
- struct xdp_buff xdp;
int pkt_offset = 0;
void *pktbuf;
@@ -2535,18 +2565,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
pktbuf);
+ ctx.rx_ts = timestamp;
pkt_offset = IGC_TS_HDR_LEN;
size -= IGC_TS_HDR_LEN;
}
if (!skb) {
- xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
- xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
+ xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq);
+ xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring),
igc_rx_offset(rx_ring) + pkt_offset,
size, true);
- xdp_buff_clear_frags_flag(&xdp);
+ xdp_buff_clear_frags_flag(&ctx.xdp);
+ ctx.rx_desc = rx_desc;
- skb = igc_xdp_run_prog(adapter, &xdp);
+ skb = igc_xdp_run_prog(adapter, &ctx.xdp);
}
if (IS_ERR(skb)) {
@@ -2568,9 +2600,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
} else if (skb)
igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
- skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
+ skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
else
- skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
+ skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
timestamp);
/* exit if we failed to retrieve a buffer */
@@ -2671,6 +2703,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
napi_gro_receive(&q_vector->napi, skb);
}
+static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
+{
+ /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The
+ * igc_xdp_buff shares its layout with xdp_buff_xsk and private
+ * igc_xdp_buff fields fall into xdp_buff_xsk->cb
+ */
+ return (struct igc_xdp_buff *)xdp;
+}
+
static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
{
struct igc_adapter *adapter = q_vector->adapter;
@@ -2689,6 +2730,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
while (likely(total_packets < budget)) {
union igc_adv_rx_desc *desc;
struct igc_rx_buffer *bi;
+ struct igc_xdp_buff *ctx;
ktime_t timestamp = 0;
unsigned int size;
int res;
@@ -2706,9 +2748,13 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
bi = &ring->rx_buffer_info[ntc];
+ ctx = xsk_buff_to_igc_ctx(bi->xdp);
+ ctx->rx_desc = desc;
+
if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
bi->xdp->data);
+ ctx->rx_ts = timestamp;
bi->xdp->data += IGC_TS_HDR_LEN;
@@ -2796,6 +2842,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
__netif_tx_lock(nq, cpu);
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+
budget = igc_desc_unused(ring);
while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
@@ -5219,7 +5268,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
if (tsicr & IGC_TSICR_TXTS) {
/* retrieve hardware timestamp */
- schedule_work(&adapter->ptp_tx_work);
+ igc_ptp_tx_tstamp_event(adapter);
ack |= IGC_TSICR_TXTS;
}
@@ -6075,9 +6124,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
size_t n;
int i;
- adapter->qbv_enable = qopt->enable;
+ switch (qopt->cmd) {
+ case TAPRIO_CMD_REPLACE:
+ adapter->qbv_enable = true;
+ break;
+ case TAPRIO_CMD_DESTROY:
+ adapter->qbv_enable = false;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- if (!qopt->enable)
+ if (!adapter->qbv_enable)
return igc_tsn_clear_schedule(adapter);
if (qopt->base_time < 0)
@@ -6321,6 +6379,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
__netif_tx_lock(nq, cpu);
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+
drops = 0;
for (i = 0; i < num_frames; i++) {
int err;
@@ -6461,6 +6522,58 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
return value;
}
+/* Mapping HW RSS Type to enum xdp_rss_hash_type */
+static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = {
+ [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2,
+ [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP,
+ [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4,
+ [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP,
+ [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
+ [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6,
+ [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
+ [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP,
+ [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP,
+ [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX,
+ [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */
+ [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */
+ [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */
+ [13] = XDP_RSS_TYPE_NONE,
+ [14] = XDP_RSS_TYPE_NONE,
+ [15] = XDP_RSS_TYPE_NONE,
+};
+
+static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
+{
+ const struct igc_xdp_buff *ctx = (void *)_ctx;
+
+ if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))
+ return -ENODATA;
+
+ *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss);
+ *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)];
+
+ return 0;
+}
+
+static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
+{
+ const struct igc_xdp_buff *ctx = (void *)_ctx;
+
+ if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
+ *timestamp = ctx->rx_ts;
+
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
+ .xmo_rx_hash = igc_xdp_rx_hash,
+ .xmo_rx_timestamp = igc_xdp_rx_timestamp,
+};
+
/**
* igc_probe - Device Initialization Routine
* @pdev: PCI device information struct
@@ -6534,6 +6647,7 @@ static int igc_probe(struct pci_dev *pdev,
hw->hw_addr = adapter->io_addr;
netdev->netdev_ops = &igc_netdev_ops;
+ netdev->xdp_metadata_ops = &igc_xdp_metadata_ops;
igc_ethtool_set_ops(netdev);
netdev->watchdog_timeo = 5 * HZ;
@@ -6561,6 +6675,7 @@ static int igc_probe(struct pci_dev *pdev,
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
netdev->features |= NETIF_F_TSO_ECN;
+ netdev->features |= NETIF_F_RXHASH;
netdev->features |= NETIF_F_RXCSUM;
netdev->features |= NETIF_F_HW_CSUM;
netdev->features |= NETIF_F_SCTP_CRC;
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 4e10ced736db..32ef112f8291 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -536,9 +536,34 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter)
wr32(IGC_TSYNCRXCTL, val);
}
+static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+
+ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+}
+
static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ int i;
+
+ /* Clear the flags first to avoid new packets to be enqueued
+ * for TX timestamping.
+ */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+ clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags);
+ }
+
+ /* Now we can clean the pending TX timestamp requests. */
+ igc_ptp_clear_tx_tstamp(adapter);
wr32(IGC_TSYNCTXCTL, 0);
}
@@ -546,12 +571,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter)
static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ int i;
wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG);
/* Read TXSTMP registers to discard any timestamp previously stored. */
rd32(IGC_TXSTMPL);
rd32(IGC_TXSTMPH);
+
+ /* The hardware is ready to accept TX timestamp requests,
+ * notify the transmit path.
+ */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+ set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags);
+ }
+
}
/**
@@ -603,6 +639,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
return 0;
}
+/* Requires adapter->ptp_tx_lock held by caller. */
static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
@@ -610,7 +647,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
dev_kfree_skb_any(adapter->ptp_tx_skb);
adapter->ptp_tx_skb = NULL;
adapter->tx_hwtstamp_timeouts++;
- clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
/* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */
rd32(IGC_TXSTMPH);
netdev_warn(adapter->netdev, "Tx timestamp timeout\n");
@@ -618,20 +654,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
void igc_ptp_tx_hang(struct igc_adapter *adapter)
{
- bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
- IGC_PTP_TX_TIMEOUT);
+ unsigned long flags;
- if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
- return;
+ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
- /* If we haven't received a timestamp within the timeout, it is
- * reasonable to assume that it will never occur, so we can unlock the
- * timestamp bit when this occurs.
- */
- if (timeout) {
- cancel_work_sync(&adapter->ptp_tx_work);
- igc_ptp_tx_timeout(adapter);
- }
+ if (!adapter->ptp_tx_skb)
+ goto unlock;
+
+ if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT))
+ goto unlock;
+
+ igc_ptp_tx_timeout(adapter);
+
+unlock:
+ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}
/**
@@ -641,20 +677,57 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter)
* If we were asked to do hardware stamping and such a time stamp is
* available, then it must have been for this skb here because we only
* allow only one such packet into the queue.
+ *
+ * Context: Expects adapter->ptp_tx_lock to be held by caller.
*/
static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
{
struct sk_buff *skb = adapter->ptp_tx_skb;
struct skb_shared_hwtstamps shhwtstamps;
struct igc_hw *hw = &adapter->hw;
+ u32 tsynctxctl;
int adjust = 0;
u64 regval;
if (WARN_ON_ONCE(!skb))
return;
- regval = rd32(IGC_TXSTMPL);
- regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+ tsynctxctl = rd32(IGC_TSYNCTXCTL);
+ tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0;
+ if (tsynctxctl) {
+ regval = rd32(IGC_TXSTMPL);
+ regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+ } else {
+ /* There's a bug in the hardware that could cause
+ * missing interrupts for TX timestamping. The issue
+ * is that for new interrupts to be triggered, the
+ * IGC_TXSTMPH_0 register must be read.
+ *
+ * To avoid discarding a valid timestamp that just
+ * happened at the "wrong" time, we need to confirm
+ * that there was no timestamp captured, we do that by
+ * assuming that no two timestamps in sequence have
+ * the same nanosecond value.
+ *
+ * So, we read the "low" register, read the "high"
+ * register (to latch a new timestamp) and read the
+ * "low" register again, if "old" and "new" versions
+ * of the "low" register are different, a valid
+ * timestamp was captured, we can read the "high"
+ * register again.
+ */
+ u32 txstmpl_old, txstmpl_new;
+
+ txstmpl_old = rd32(IGC_TXSTMPL);
+ rd32(IGC_TXSTMPH);
+ txstmpl_new = rd32(IGC_TXSTMPL);
+
+ if (txstmpl_old == txstmpl_new)
+ return;
+
+ regval = txstmpl_new;
+ regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+ }
if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval))
return;
@@ -676,13 +749,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
shhwtstamps.hwtstamp =
ktime_add_ns(shhwtstamps.hwtstamp, adjust);
- /* Clear the lock early before calling skb_tstamp_tx so that
- * applications are not woken up before the lock bit is clear. We use
- * a copy of the skb pointer to ensure other threads can't change it
- * while we're notifying the stack.
- */
adapter->ptp_tx_skb = NULL;
- clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
/* Notify the stack and free the skb after we've unlocked */
skb_tstamp_tx(skb, &shhwtstamps);
@@ -690,27 +757,25 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
}
/**
- * igc_ptp_tx_work
- * @work: pointer to work struct
+ * igc_ptp_tx_tstamp_event
+ * @adapter: board private structure
*
- * This work function polls the TSYNCTXCTL valid bit to determine when a
- * timestamp has been taken for the current stored skb.
+ * Called when a TX timestamp interrupt happens to retrieve the
+ * timestamp and send it up to the socket.
*/
-static void igc_ptp_tx_work(struct work_struct *work)
+void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter)
{
- struct igc_adapter *adapter = container_of(work, struct igc_adapter,
- ptp_tx_work);
- struct igc_hw *hw = &adapter->hw;
- u32 tsynctxctl;
+ unsigned long flags;
- if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
- return;
+ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
- tsynctxctl = rd32(IGC_TSYNCTXCTL);
- if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0)))
- return;
+ if (!adapter->ptp_tx_skb)
+ goto unlock;
igc_ptp_tx_hwtstamp(adapter);
+
+unlock:
+ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}
/**
@@ -959,8 +1024,8 @@ void igc_ptp_init(struct igc_adapter *adapter)
return;
}
+ spin_lock_init(&adapter->ptp_tx_lock);
spin_lock_init(&adapter->tmreg_lock);
- INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);
adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
@@ -1020,10 +1085,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
return;
- cancel_work_sync(&adapter->ptp_tx_work);
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
- clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+ igc_ptp_clear_tx_tstamp(adapter);
if (pci_device_is_present(adapter->pdev)) {
igc_ptp_time_save(adapter);